This R markdown file contains all of the analyses for the KOSMOS paper.
# Get world map
world_df <- map_data("world")
# pull out only peru
peru_df <- world_df[world_df$region == "Peru",]
# Load data
peru_spdf <- readOGR(dsn = "/Users/mmin/Documents/KOSMOS_data_local/map_files/PER_adm/PER_adm0.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/mmin/Documents/KOSMOS_data_local/map_files/PER_adm/PER_adm0.shp", layer: "PER_adm0"
## with 1 features
## It has 70 fields
## Integer64 fields read as strings: ID_0 OBJECTID_1
# Convert to df(ish)
peru_spdf_fort <- tidy(peru_spdf)
## Regions defined for each Polygons
peru_map <- ggplot(data = world_df, aes(x = long, y = lat, group = group))+
geom_polygon(fill = "gray60", color = "grey40")+
geom_polygon(data = peru_df,aes(x = long, y = lat, group = group, fill = region), color = "black")+
scale_fill_manual(values = c("white"))+
coord_fixed(xlim = c(-90, -68), ylim = c(-29,1), ratio = 1.3)+
theme(legend.position = "none",
panel.background = element_rect(fill="#c6dbef"),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_text(size = 13,face = "bold"),
axis.title = element_text(size = 15, face = "bold"))+
ylab("Latitude")+
xlab("Longitude")+
scale_x_continuous(breaks = c(-90,-85,-80,-75,-70), expand = c(0,0), labels=c(expression(paste(90*degree,"W")),expression(paste(85*degree,"W")),
expression(paste(80*degree,"W")),expression(paste(75*degree,"W")),
expression(paste(70*degree,"W"))))+
scale_y_continuous(breaks = seq(-25,0,5), expand = c(0,0), labels=c(expression(paste(25*degree,"S")),
expression(paste(20*degree,"S")),expression(paste(15*degree,"S")),
expression(paste(10*degree,"S")),expression(paste(5*degree,"S")),expression(paste(0*degree))))+
annotate("rect", xmin = -77.6, xmax = -76.8, ymin = -12.5, ymax = -11.9,color="firebrick3", fill = NA)+
annotate("text",label = "Peru",x = -75.5, y = -10,size = 8)+
annotate("text", label = " La Punta \n (Callao) ",x = -86, y = -12.2, size = 8)+
geom_segment(aes(x = -83, y = -12.2, xend = -78, yend = -12.2), colour='black', size=1,arrow = arrow(length = unit(0.25, "cm"),type = "closed"))+
# callout to mesocosm setup
annotate("segment",x = -77.6, y = -12.5, xend = -87, yend = -15.5, color = "firebrick3", lty = 2)+
annotate("segment",x = -76.8, y = -12.5, xend = -78, yend = -15.5, color = "firebrick3", lty = 2)+
# mesocosm arrangement - manually because I stink at for loops
# Box
annotate("rect", xmin = -87, xmax = -78, ymin = -15.5, ymax = -27.5,color="firebrick3",fill=NA, lty = 3)+ #background
# Left row
annotate("point", x = -85, y = -17, shape = 21, fill = "white", size = 12, color = "firebrick3",stroke = 2)+ # M1
annotate("text", x = -85, y = -17,label = "M1", color = "black",size = 5)+ # M1
annotate("point", x = -85, y = -20, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M3
annotate("text", x = -85, y = -20,label = "M3", color = "black", alpha = 0.5)+ # M3
annotate("point", x = -85, y = -23, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M5
annotate("text", x = -85, y = -23,label = "M5", color = "black", alpha = 0.5)+ # M5
annotate("point", x = -85, y = -26, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M7
annotate("text", x = -85, y = -26, label = "M7", color = "black", alpha = 0.5)+ # M7
# Right row
annotate("point", x = -80, y = -17, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M2
annotate("text", x = -80, y = -17,label = "M2", color = "black", alpha = 0.5)+ # M2
annotate("point", x = -80, y = -20, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M4
annotate("text", x = -80, y = -20,label = "M4", color = "black", alpha = 0.5)+ # M4
annotate("point", x = -80, y = -23, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M6
annotate("text", x = -80, y = -23,label = "M6", color = "black", alpha = 0.5)+ # M6
annotate("point", x = -80, y = -26, shape = 21, fill = "white", size = 12, stroke = 1, alpha = 0.5)+ # M8
annotate("text", x = -80, y = -26, label = "M8", color = "black", alpha = 0.5)+ # M8
# Pacific
# annotate("point", x = -77.3175, y = -12.0875, shape = 21, fill = "white",color = "white", size = 15, stroke = 1)+ # Pacific
annotate("text", x = -82.5, y = -21.5,label = "bold(Pacific)", color = "darkblue",size = 6, parse = TRUE) # Pacific
peru_map
ggsave("kosmos_map.pdf", width = 5, height = 8,path = "/Users/mmin/Documents/KOSMOS_paper_figures")
M_nutrients <- read.csv(file = "/Users/mmin/Documents/KOSMOS_data_local/kosmos_physical_data/Figure_4_Francisco.csv")
M_chla_long <- read.csv(file = "/Users/mmin/Documents/KOSMOS_data_local/kosmos_physical_data/chla_Francisco.csv")
kosmos_T <- read.csv(file = "/Users/mmin/Documents/KOSMOS_data_local/kosmos_physical_data/kosmos_temperature_all.csv")
kosmos_S <- read.csv(file = "/Users/mmin/Documents/KOSMOS_data_local/kosmos_physical_data/kosmos_salinity_all.csv")
# Convert all five datasets into long form for plots
# Separate out long form data into each nutrient separately
NOx_long <- subset(M_nutrients, parameter %in% c("NOx"))
NOx_long %>% mutate(mesocosm = paste0("M",mesocosm)) %>% mutate(mesocosm = ifelse(mesocosm == "M10","Pacific",mesocosm)) -> NOx_long
SiO4_long <-subset(M_nutrients, parameter %in% c("SiO4"))
SiO4_long %>% mutate(mesocosm = paste0("M",mesocosm)) %>% mutate(mesocosm = ifelse(mesocosm == "M10","Pacific",mesocosm)) -> SiO4_long
rownames(M_chla_long) = NULL
chla_long <- M_chla_long
chla_long %>% mutate(mesocosm = paste0("M",mesocosm)) %>% mutate(mesocosm = ifelse(mesocosm == "M10","Pacific",mesocosm)) -> chla_long
T_long <- kosmos_T %>% gather(.,mesocosm, value, M1:Pacific)
parameter = rep("temp",nrow(T_long))
T_long %>% dplyr::rename(.,sampling.day = day) %>% data.frame(.,parameter) -> T_long
S_long <- kosmos_S %>% gather(.,mesocosm, value, M1:Pacific)
parameter = rep("sal",nrow(S_long))
S_long %>% dplyr::rename(.,sampling.day = day) %>% data.frame(.,parameter) -> S_long
# Concat the different variables, export as one dataframe for inclusion with publication
combined_physicochemical_data <- bind_rows(bind_rows(bind_rows(bind_rows(T_long,S_long),NOx_long),SiO4_long),chla_long)
write.csv(combined_physicochemical_data, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_physical_data/Figure_2_data.csv")
combined_physicochemical_data <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/kosmos_physical_data/Figure_2_data.csv",row.names = 1)
combined_physicochemical_data$mesocosm <- as.character(combined_physicochemical_data$mesocosm)
combined_physicochemical_data$parameter <- as.character(combined_physicochemical_data$parameter)
T_long <- subset(combined_physicochemical_data,parameter == "temp")
S_long <- subset(combined_physicochemical_data,parameter == "sal")
NOx_long <- subset(combined_physicochemical_data,parameter == "NOx")
SiO4_long <- subset(combined_physicochemical_data,parameter == "SiO4")
chla_long <- subset(combined_physicochemical_data,parameter == "chla")
variable_names <- c("NOx","SiO4","chla","T","S")
for (i in seq(1:length(variable_names))){
df <- pivot_wider(eval(parse(text = paste0(variable_names[i],"_long"))),names_from = mesocosm, values_from = value)
colnames(df) <- c("sampling.day","parameter","M1","M2","M3","M4","M5","M6","M7","M8","Pacific")
# convert to long form
df_long <- df %>% gather(.,station, value, M1:M8)
# drop Pacific values
df_long <- df_long[,!(names(df_long) %in% c("Pacific"))]
# Sumamrize data to calculate the mean + SE
df_meanSE_data <- summarySE(df_long, measurevar="value", groupvars=c("sampling.day"),na.rm = TRUE)
# Add in M1 and Pacific to this data
complete_df <- left_join(df[,c("sampling.day", "M1","Pacific")],df_meanSE_data)
# Rename some columns
complete_df %>% dplyr::rename(., M1_M8_mean = value, M1_M8_sd = sd,M1_M8_se = se,M1_M8_ci = ci) -> complete_df
# Save as a dataframe
assign(paste0(variable_names[i],"_df"), complete_df)
}
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
## Joining, by = "sampling.day"
There are a number of “ifelse” statements in this chunk of code which are used to adjust the plot margins in order to align the plots, as well as make the top and bottom (a and e) figures have x-axis labels.
variable_names <- c("T","S","NOx","SiO4","chla")
panel = c("(a)","(b)","(c)","(d)","(e)")
for (i in seq(1:length(variable_names))){
plot <- ggplot(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1))+
geom_line(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1),color = "firebrick3",size = 1.25)+
geom_line(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1_M8_mean),color = "grey50",size = 1)+
geom_line(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "darkblue",size = 1.25)+
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "black",size = 1)+
geom_ribbon(data = eval(parse(text = paste0(variable_names[i],"_df"))),aes(x = sampling.day,ymax = M1_M8_mean+M1_M8_se,ymin = M1_M8_mean-M1_M8_se),fill = "grey50",alpha = 0.5)+
geom_point(data = subset(eval(parse(text = paste0(variable_names[i],"_df"))),sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = Pacific), color = "darkblue", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
geom_point(data = subset(eval(parse(text = paste0(variable_names[i],"_df"))),sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = M1), color = "firebrick3", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
# Add points for sampling points
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1),color = "black",size = 1.5)+
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "black",size = 1.5)+
xlab("Day of Experiment")+
# Three different themes: for top panel, middle three panels, and bottom panel
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text.y = element_text(size = 15,face = "bold"),
axis.text.x.bottom = element_text(size = ifelse(panel[i] %in% c("(e)"),20,0),face = "bold"),
axis.text.x.top = element_text(size = ifelse(panel[i] %in% c("(a)"),20,0),face = "bold"),
axis.title.x.bottom = element_text(size = ifelse(panel[i] %in% c("(e)"),25,0), face = "bold"),
axis.title.x.top = element_text(size = ifelse(panel[i] %in% c("(a)"),25,0), face = "bold"),
axis.title.y = element_text(size = 18, face = "bold",margin = margin(0, ifelse(panel[i] %in% c("(b)"), 1.7,
ifelse(panel[i] %in% c("(a)"), 11.75, 11.5)), 0,0)),
axis.ticks.length.x.bottom = unit(0.25,"cm"),
axis.ticks.length.x.top = unit(ifelse(panel[i] %in% c("(a)"),0.25,0),"cm"),
plot.margin = unit(c(ifelse(panel[i] == "(a)",0,5),0,0,3), "pt"),
legend.position = "none")+
ylim(ifelse(panel[i] %in% c("(c)","(d)","(e)"),0,min(eval(parse(text = paste0(variable_names[i],"_df")))$Pacific)),NA)+
# Add in secondary x axis and labels for temperature plot
scale_x_continuous(sec.axis = sec_axis(~ .,name = "Sample Number", breaks = c(1,8,15,24,32,36,42,48),labels = c(1,2,3,4,5,6,7,8)),limits = c(1,50))+
# geom_text(data = subset(eval(parse(text = paste0(variable_names[i],"_df"))), sampling.day %in% c(1,8,15,24,32,36,42,48)), aes(x = sampling.day, y = 21.28, label = sample_number), size = ifelse(panel[i] %in% c("(a)"),7,0), color = "black")+
annotate("text", x=-Inf, y = Inf, label = paste0(panel[i]), vjust=1.5, hjust=-0.3,size = 12)+
# add vertical lines for omz water addition
geom_vline(xintercept = 11, lty = 2, color = "seagreen", size = 1)+
geom_vline(xintercept = 12, lty = 2, color = "seagreen", size = 1)+
# add vertical lines for NaCl brine addition
geom_vline(xintercept = 13, lty = 2, color = "#ff7f00", size = 1)+
geom_vline(xintercept = 33, lty = 2, color = "#ff7f00", size = 1)+
# Use an ifelse statement to change the y labels
ylab(ifelse(panel[i] == "(a)",expression(bold(~"Temperature" ~ ("°C"))),
ifelse(panel[i] == "(b)",expression(bold(~"Salinity" ~ ("psu"))),
ifelse(panel[i] == "(c)",expression(bold(~"NO"[2] ~ + ~"NO"[3] ~ (mu*"mol L" ^-1)~"\n")),
ifelse(panel[i] == "(d)",expression(bold(~"Si(OH)"[4] ~ (mu*"mol L" ^-1)~"\n")),
ifelse(panel[i] == "(e)",expression(bold(~"chl-a" ~ (mu*"g L" ^-1) ~ "\n")),
"oops you messed up"))))))
ggsave(paste0("/Users/mmin/Documents/KOSMOS_paper_figures/biogeochemical_figure/v4_FINAL/",variable_names[i],"_gg.png"),plot,width = 7, height = ifelse(panel[i] %in% c("(a)","(e)"),3.5,3))
# show plot
plot
}
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
legend_gg <- ggplot(data = T_long, aes(x = Day, y = value, color = variable))+
xlim(30,38)+
ylim(20.23,20.67)+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = unit(c(0,0,0,58),"pt"))+
annotate("segment", x = 30, xend = 31, y = 20.55, yend = 20.55, color = "firebrick3",size = 1.5)+ #mesocosm
annotate("segment", x = 30, xend = 31, y = 20.35, yend = 20.35, color = "darkblue", size = 1.5)+ #pacific
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 1.5)+ #M1-M8 average
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 5,alpha = 0.5)+ #M1-M8 average SE
annotate("point", x = 35, y = 20.35, shape = 21, colour = "black", fill = "white", size = 7, stroke = 3)+ # eDNA sample
annotate("point", x = 35, y = 20.35, colour = "black", size = 1.5)+ # eDNA sample
annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacific
annotate("text",label = "Mean M1-M8", x = 36, y = 20.55, size = 6, adj = 0)+ #M1-M8 average
annotate("text",label = "eDNA Sample", x = 36, y = 20.35, size = 6, adj = 0) #eDNA sample
legend_gg
ggsave("legend_gg.png", width = 7, height = 1,path = "/Users/mmin/Documents/KOSMOS_paper_figures/biogeochemical_figure/v4_FINAL")
FCM_df <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/FCM/KOSMOS2017_FCM_data_final_version_MM_edits.csv")
# Convert to long format (to achieve consistency with biogeochemical data format)
# Gather the five columns that we will plot
gathercols <- c("Synechococcus", "total_Eukaryotes", "cryptophytes", "heterotrophic.bacteria", "small_bacterial_population")
FCM_df_gather <- gather(dplyr::select(FCM_df,-c(Sample,Prochlorococcus,notes)), key = "parameter", value = "value", gathercols, factor_key=TRUE)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(gathercols)` instead of `gathercols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
colnames(FCM_df_gather) <- c("mesocosm","sampling.day","parameter","value")
# Divide by 10000, so all values are in cells/ml x 10^4 (see Worden et al. 2004 for precedent)
FCM_df_gather$value <- FCM_df_gather$value/10000
# Take out only M1 and Pacific, drop DW
FCM_df_gather <- subset(FCM_df_gather, mesocosm %in% c("Pacific","Mesocosm 1"))
FCM_df_gather$mesocosm <- as.character(FCM_df_gather$mesocosm)
FCM_df_gather$parameter <- as.character(FCM_df_gather$parameter)
syn_long <- subset(FCM_df_gather,parameter == "Synechococcus")
euks_long <- subset(FCM_df_gather,parameter == "total_Eukaryotes")
cryp_long <- subset(FCM_df_gather,parameter == "cryptophytes")
het_bact_long <- subset(FCM_df_gather,parameter == "heterotrophic.bacteria")
small_bact_long <- subset(FCM_df_gather,parameter == "small_bacterial_population")
variable_names <- c("syn","euks","cryp","het_bact","small_bact")
for (i in seq(1:length(variable_names))){
df <- pivot_wider(eval(parse(text = paste0(variable_names[i],"_long"))),names_from = mesocosm, values_from = value)
# Remove first sampling point, because Alex and Bente don't trust it (issue with water handling or sampling)
df <- subset(df, sampling.day != 2)
colnames(df) <- c("sampling.day","parameter","Pacific","M1")
df$Pacific <- as.numeric(df$Pacific)
df$M1 <- as.numeric(df$M1)
assign(paste0(variable_names[i],"_df"), df)
}
euks_df
## # A tibble: 22 x 4
## sampling.day parameter Pacific M1
## <int> <chr> <dbl> <dbl>
## 1 4 total_Eukaryotes 0.486 0.586
## 2 6 total_Eukaryotes 0.434 0.707
## 3 8 total_Eukaryotes 0.520 0.442
## 4 10 total_Eukaryotes 0.491 0.170
## 5 12 total_Eukaryotes 0.500 0.265
## 6 15 total_Eukaryotes 1.25 0.514
## 7 16 total_Eukaryotes 0.903 0.688
## 8 17 total_Eukaryotes 0.763 0.613
## 9 22 total_Eukaryotes 0.296 0.529
## 10 26 total_Eukaryotes 0.727 0.91
## # … with 12 more rows
variable_names <- c("syn","euks","cryp","het_bact","small_bact")
panel = c("(f)","(g)","(h)","(i)","(j)")
for (i in seq(1:length(variable_names))){
df = eval(parse(text = paste0(variable_names[i],"_df")))
plot <- ggplot(data = df[!is.na(df$M1),], aes(x = sampling.day, y = M1))+
geom_line(data = df[!is.na(df$M1),], aes(x = sampling.day, y = M1),color = "firebrick3",size = 1.25)+
geom_line(data = df[!is.na(df$Pacific),], aes(x = sampling.day, y = Pacific),color = "darkblue",size = 1.25)+
geom_point(data = subset(df,sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = Pacific), color = "darkblue", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
geom_point(data = subset(df,sampling.day %in% c(1,8,15,24,32,36,42,48)),aes(x = sampling.day, y = M1), color = "firebrick3", shape = 21, fill = "white", size = 3.5, stroke = 2.5)+
# Add points for sampling points
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = M1),color = "black",size = 1.5)+
geom_point(data = eval(parse(text = paste0(variable_names[i],"_df"))), aes(x = sampling.day, y = Pacific),color = "black",size = 1.5)+
xlab("Day of Experiment")+
# Three different themes: for top panel, middle three panels, and bottom panel
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text.y = element_text(size = 15,face = "bold"),
axis.text.x.bottom = element_text(size = ifelse(panel[i] %in% c("(j)"),20,0),face = "bold"),
axis.text.x.top = element_text(size = ifelse(panel[i] %in% c("(f)"),20,0),face = "bold"),
axis.title.x.bottom = element_text(size = ifelse(panel[i] %in% c("(j)"),25,0), face = "bold"),
axis.title.x.top = element_text(size = ifelse(panel[i] %in% c("(f)"),25,0), face = "bold"),
# axis.title.y = element_text(size = 18, face = "bold"),
axis.ticks.length.x.bottom = unit(0.25,"cm"),
axis.ticks.length.x.top = unit(ifelse(panel[i] %in% c("(f)"),0.25,0),"cm"),
plot.margin = unit(c(ifelse(panel[i] == "(f)",0,5),0,0,3), "pt"),
# plot.margin = unit(c(ifelse(panel[i] == "(f)",0,5),0,0,ifelse(panel[i] %in% c("(f)"),12.5,
# ifelse(panel[i] %in% c("(g)"),21,
# ifelse(panel[i] %in% c("(h)"),0,
# ifelse(panel[i] %in% c("(i)"),5,
# ifelse(panel[i] %in% c("(j)"),7.5,999999)))))), "pt"),
axis.title.y = element_text(size = 18, face = "bold",margin = margin(t = ifelse(panel[i] == "(f)",0,5),r = ifelse(panel[i] %in% c("(f)"),12.5,
ifelse(panel[i] %in% c("(g)"),21,
ifelse(panel[i] %in% c("(h)"),0,
ifelse(panel[i] %in% c("(i)"),5,
ifelse(panel[i] %in% c("(j)"),7.5,999999))))),b =0,l =0, "pt")),
legend.position = "none")+
xlim(1,50)+
# ylim(ifelse(panel[i] %in% c("(h)","(i)","(j)"),0,min(df$Pacific)),NA)+
ylim(0,ifelse(panel[i] %in% c("(f)"),45,
ifelse(panel[i] %in% c("(i)"),750, NA)))+
# Add in secondary x axis and labels for temperature plot
scale_x_continuous(sec.axis = sec_axis(~ .,name = "Sample Number", breaks = c(1,8,15,24,32,36,42,48),labels = c(1,2,3,4,5,6,7,8)),limits = c(1,50))+
# geom_text(data = subset(df, sampling.day %in% c(1,8,15,24,32,36,42,48)), aes(x = sampling.day, y = 21.28, label = sample_number), size = ifelse(panel[i] %in% c("(f)"),7,0), color = "black")+
annotate("text", x=-Inf, y = Inf, label = paste0(panel[i]), vjust=1.5, hjust=-0.3,size = 12)+
# add vertical lines for omz water addition
geom_vline(xintercept = 11, lty = 2, color = "seagreen", size = 1)+
geom_vline(xintercept = 12, lty = 2, color = "seagreen", size = 1)+
# add vertical lines for NaCl brine addition
geom_vline(xintercept = 13, lty = 2, color = "#ff7f00", size = 1)+
geom_vline(xintercept = 33, lty = 2, color = "#ff7f00", size = 1)+
# Use an ifelse statement to change the y labels
ylab(ifelse(panel[i] == "(f)",expression(bold(~"Synechococcus")),
ifelse(panel[i] == "(g)",expression(bold(~"Total Eukaryotes")),
ifelse(panel[i] == "(h)",expression(bold(~"Cryptophytes")),
ifelse(panel[i] == "(i)",expression(bold(~"Heterotrophic Bacteria")),
ifelse(panel[i] == "(j)",expression(bold(~"Small Bacteria")),
"oops you messed up"))))))
ggsave(paste0("/Users/mmin/Documents/KOSMOS_paper_figures/biogeochemical_figure/v4_FINAL/",variable_names[i],"_gg.png"),plot,width = 7, height = ifelse(panel[i] %in% c("(f)","(j)"),3.5,3))
}
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing missing values (geom_point).
# Make a "cells/ml" label
cells_ml_legend_gg <- ggplot(data = syn_df, aes(x = sampling.day, y = M1))+
geom_point()+
ylab(expression(bold(~"Cells ml" ^-1 ~ ("x 10"^4) ~ "\n")))+
theme(axis.title.y = element_text(size = 30))
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/biogeochemical_figure/v4_FINAL/cells_ml_legend.png", cells_ml_legend_gg,width = 7,height = 7)
legend_gg <- ggplot(data = T_long, aes(x = Day, y = value, color = variable))+
xlim(30,38)+
ylim(20.23,20.67)+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=2),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = unit(c(0,0,0,58),"pt"))+
annotate("segment", x = 30, xend = 31, y = 20.55, yend = 20.55, color = "firebrick3",size = 1.5)+ #mesocosm
annotate("segment", x = 30, xend = 31, y = 20.35, yend = 20.35, color = "darkblue", size = 1.5)+ #pacific
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 1.5)+ #M1-M8 average
annotate("segment", x = 34.5, xend = 35.5, y = 20.55, yend = 20.55, color = "grey50", size = 5,alpha = 0.5)+ #M1-M8 average SE
annotate("point", x = 35, y = 20.35, shape = 21, colour = "black", fill = "white", size = 7, stroke = 3)+ # eDNA sample
annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacific
annotate("text",label = "Mean M1-M8", x = 36, y = 20.55, size = 6, adj = 0)+ #M1-M8 average
annotate("text",label = "eDNA Sample", x = 36, y = 20.35, size = 6, adj = 0) #eDNA sample
legend_gg
ggsave("legend_gg.png", width = 7, height = 1,path = "/Users/mmin/Documents/KOSMOS_paper_figures")
We will be reading in the master datasets from MBON and subsetting out only the KOSMOS project.
metadata <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/COI/for_MBON/decontaminated/072720/COI_master_metadata.csv")
metadata$sample_name <- as.character(metadata$sample_name)
## Depth
# Change depth to character so that the mutate line will work
metadata$depth <- as.character(metadata$depth)
# Change surface to 1 m, bottom to 20 m for Florida samples
metadata %>% mutate(.,depth = ifelse(depth %in% c("Surface","SUR"),"1",
ifelse(depth %in% c("Bottom","BOT"),"20",depth))) -> metadata
metadata$depth <- as.numeric(as.character(metadata$depth))
## Warning: NAs introduced by coercion
# Flip the depths for the 67-60 samples in the C3PO19 cruise
metadata$depth <- as.numeric(as.character(metadata$depth))
metadata %>% mutate(.,depth = ifelse(metadata$sample_name %in% c("C3PO19c28_12_eDNA_GG"),150,
ifelse(metadata$sample_name %in% c("C3PO19c28_9_eDNA_GG"),100,
ifelse(metadata$sample_name %in% c("C3PO19c28_6_eDNA_GG"),60,
ifelse(metadata$sample_name %in% c("C3PO19c28_4_eDNA_GG"),20,
ifelse(metadata$sample_name %in% c("C3PO19c28_3_eDNA_GG"),0,depth)))))) -> metadata
# Change depth from factor to numeric
metadata$depth <- as.numeric(as.character(metadata$depth))
## Add in the depth metadata for the Hawaii + Aku samples - in email from Kris Walz on 4/24/20
metadata %>% mutate(depth = ifelse(sample_name == "HI300_J", 300,
ifelse(sample_name == "HI650_J", 650,
ifelse(sample_name == "HI900_J", 900, depth)))) -> metadata
# AKU samples
## Sampling device
netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
netfilt_samples <- subset(metadata, metadata$sample_name %in% metadata$sample_name[netfilts])
netfilt_sample_names <- netfilt_samples$sample_name
metadata_updated <- metadata %>%
mutate(.,sampling_device = ifelse(sample_type %in% c("positive","negative","blank"),"CONTROL",
ifelse(sample_name %in% netfilt_sample_names,"Net filtrate",
ifelse(SAMPLING_platform %in% c("WESTERN FLYER", "RACHEL CARSON", "FULMAR", "WESTERNFLYER"," WESTERN FLYER", "POINT LOBOS", "Point Lobos",
"Point lobos", "Point Sur", "McArthur II", "JohnMartin","JOHN MARTIN","POINT SUR","rachel carson","point lobos","McArthur II", "Western Flyer", "Fulmar", "Rachel Carson", "Rachel carson","CPF","POINT 0"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("REUBEN LASKER"),"Net filtrate",
ifelse(SAMPLING_platform %in% c("DocRicketts"),"ROV",
ifelse(SAMPLING_platform %in% c("Mesocosm"),"Mesocosm",
ifelse(SAMPLING_platform %in% c("Tethys","MAKAI","DORADO","AKU","Dorado389"),"3G ESP",
ifelse(samp_collection_device %in% c("CTD/SBE911 Rosette with Niskin bottle"),"Shipboard Niskin",
ifelse(SAMPLE_Collection_Device %in% c("CTD/SBE911 Rosette with Niskin bottle","CTD/SBE911_Rosette_with_Niskin_bottle"),"Shipboard Niskin",
ifelse(samp_collection_device %in% c("SCUBA diver/bottle"),"SCUBA diver",
ifelse(samp_collection_device %in% c("Methot_net"),"Net filtrate",
ifelse(sample_name %in% c("CN18Fc27_bongo__GG","CN18Fc38_bongo__GG","CN18Fc43_bongo_GG"),"Net filtrate",
metadata$samp_collection_device)))))))))))))
#metadata_updated$sampling_device
## Project
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$description <- as.character(metadata_updated$description)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN18S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(description %in% c("")),description,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
project_name)))) -> metadata_updated
## Location
metadata_updated$geo_loc_name <- as.character(metadata_updated$geo_loc_name)
## Depth as categorical
#sort(metadata_updated$depth)
metadata_updated %>% mutate(.,depth_cat = ifelse(metadata_updated$depth >= 0 & metadata_updated$depth <= 5,"0-5",
ifelse(metadata_updated$depth > 5 & metadata_updated$depth < 20,"6-19",
ifelse(metadata_updated$depth >= 20 & metadata_updated$depth < 29.5,"20-29",
ifelse(metadata_updated$depth >= 29.5 & metadata_updated$depth < 40,"30-39",
ifelse(metadata_updated$depth >= 40 & metadata_updated$depth < 60,"40-59",
ifelse(metadata_updated$depth >= 60 & metadata_updated$depth <= 80,"60-80",
ifelse(metadata_updated$depth >= 100 & metadata_updated$depth <= 199,"100-199",
ifelse(metadata_updated$depth > 199 & metadata_updated$depth <= 250,"200-250",
ifelse(metadata_updated$depth >= 280 & metadata_updated$depth <= 500,"280-500",
ifelse(metadata_updated$SAMPLING_platform %in% c("Mesocosm"),"0-5",
ifelse(is.na(metadata_updated$depth),"unknown",
"600-1565")))))))))))) -> metadata_updated
metadata_updated$depth_cat <- factor(metadata_updated$depth_cat,levels = c("0-5","6-19","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-1565"))
metadata_updated <- metadata_updated
# Use lubridate to convert date/time to posixct
metadata_updated$SAMPLING_date_time <- as.character(metadata_updated$SAMPLING_date_time)
metadata_updated %>% mutate(.,date_time_updated = parse_date_time(SAMPLING_date_time,c("ymdHM","mdyHM","ymdHMS"))) -> metadata_updated
## Warning: 1 failed to parse.
class(metadata_updated$date_time_updated)
## [1] "POSIXct" "POSIXt"
# Let's add in a month field, so we can try to visualize seasonality
metadata_updated %>% mutate(.,month_updated = month(date_time_updated,label = TRUE, abbr = FALSE)) -> metadata_updated
metadata_updated$month_updated <- as.character(metadata_updated$month_updated)
metadata_updated %>% mutate(.,month_updated = ifelse(!(is.na(month_updated)),month_updated,
ifelse(month %in% c("Jan"),"January",
ifelse(month %in% c("Feb"),"February",
ifelse(Month == "March" | month %in% c("Mar"),"March",
ifelse(month %in% c("Apr"),"April",
ifelse(Month == "May" | month %in% c("May"),"May",
ifelse(month %in% c("Jun"),"June",
ifelse(month %in% c("Jul"),"July",
ifelse(month %in% c("Aug"),"August",
ifelse(month %in% c("Sep"),"September",
ifelse(month %in% c("Oct"),"October",
ifelse(month %in% c("Nov"),"November",
ifelse(month %in% c("Dec"),"December",month_updated))))))))))))))-> metadata_updated
# Let's add a season field
metadata_updated %>% mutate(.,season = ifelse(month_updated %in% c("November","December","January"),"Winter",
ifelse(month_updated %in% c("February","March","April"),"Spring",
ifelse(month_updated %in% c("May","June","July"),"Summer",
ifelse(month_updated %in% c("August","September","October"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season <- factor(metadata_updated$season, levels = c("Winter","Spring","Summer","Fall"))
# Let's add a different season field - using the "conventional" seasons
metadata_updated %>% mutate(.,season_conv = ifelse(month_updated %in% c("January", "February", "March"),"Winter",
ifelse(month_updated %in% c("April", "May", "June"),"Spring",
ifelse(month_updated %in% c("July", "August", "September"),"Summer",
ifelse(month_updated %in% c("October", "November", "December"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season_conv <- factor(metadata_updated$season_conv, levels = c("Winter","Spring","Summer","Fall"))
metadata_updated$month_updated <- factor(metadata_updated$month_updated, levels = c("January", "February", "March", "April", "May", "June","July","August","September","October","November","December"))
# Let's add a year field
metadata_updated %>% mutate(.,year_updated = year(date_time_updated)) -> metadata_updated
# Drop 67-70 for the C3PO19 cruise - this is the cast where we the filters got mixed up
metadata_updated <- subset(metadata_updated, !(SAMPLING_station %in% c("67-70") & SAMPLING_cruise == "C3PO19"))
# Take out all non-environmental samples
metadata_updated <- subset(metadata_updated, sample_type %in% c("environmental"))
# Take out the non-eDNA samples from the Florida zooplankton study
metadata_updated <- subset(metadata_updated, !(type_sample %in% c("pre_filt","tissue","extr_blank","filt_blank")))
# Drop netfilt samples
metadata_updated <- subset(metadata_updated, !(sample_name %in% netfilt_sample_names))
# Move sample names to rownames for compatibility with DEICODE
metadata_updated <- metadata_updated[!(is.na(metadata_updated$sample_name)),]
rownames(metadata_updated) <- NULL
metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
asv_table <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/COI/for_MBON/decontaminated/072720/COI_master_ASV_table.csv",row.names = 1, check.names = FALSE)
destroyX = function(es) {
f = es
for (col in c(1:ncol(f))){ #for each column in dataframe
if (startsWith(colnames(f)[col], "X") == TRUE) { #if starts with 'X' ..
colnames(f)[col] <- substr(colnames(f)[col], 2, 100) #get rid of it
}
}
assign(deparse(substitute(es)), f, inherits = TRUE) #assign corrected data to original name
}
asv_table <- destroyX(asv_table)
asv_table <- otu_table(asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(metadata_updated)
tax_table <- tax_table(as.matrix(read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/COI/for_MBON/decontaminated/072720/COI_master_tax_table.csv",row.names = 1)))
master_phyloseq_COI <- merge_phyloseq(asv_table,sample_data,tax_table)
## Decontaminate
# no humans
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Hominidae"
)
# no cows
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Bovidae"
)
# no pigs
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Suidae"
)
# no dogs
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Canidae"
)
# no chickens
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Genus != "Gallus"
)
# no turkeys
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Genus != "Meleagris"
)
# no cats
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Family != "Felidae"
)
# no rats!! and other rodents
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Order != "Rodentia"
)
# no rabbits
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Order != "Lagomorpha"
)
# no insects
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Class != "Insecta"
)
# no arachnids
master_phyloseq_COI <- master_phyloseq_COI %>%
subset_taxa(
Class != "Arachnida"
)
master_phyloseq_COI <- prune_taxa(taxa_sums(master_phyloseq_COI) > 0,master_phyloseq_COI)
master_phyloseq_COI
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 66679 taxa and 968 samples ]
## sample_data() Sample Data: [ 968 samples by 141 sample variables ]
## tax_table() Taxonomy Table: [ 66679 taxa by 7 taxonomic ranks ]
# Subset out KOSMOS samples
kosmos_COI_phyloseq <- prune_samples(sample_data(master_phyloseq_COI)$SAMPLING_project %in% c("KOSMOS"),master_phyloseq_COI)
kosmos_COI_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_phyloseq)>0,kosmos_COI_phyloseq)
# Export metadata to share with John Zicker
write.csv(as.matrix(sample_data(kosmos_COI_phyloseq)), "/Users/mmin/Documents/KOSMOS_data_local/kosmos_metadata_082720.csv")
kosmos_COI_16_phyloseq <- prune_samples(sample_data(kosmos_COI_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), kosmos_COI_phyloseq)
kosmos_COI_16_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_16_phyloseq)>0,kosmos_COI_16_phyloseq)
kosmos_COI_asv_table <- as.data.frame(as(otu_table(kosmos_COI_phyloseq),"matrix"))
rownames_to_column(kosmos_COI_asv_table,"ASV") -> kosmos_COI_asv_table
kosmos_COI_tax_table <- as.data.frame(as(tax_table(kosmos_COI_phyloseq),"matrix"))
rownames_to_column(kosmos_COI_tax_table,"ASV") -> kosmos_COI_tax_table
kosmos_COI <- left_join(kosmos_COI_asv_table, kosmos_COI_tax_table, by = "ASV")
# write.csv(kosmos_COI, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_COI_061620.csv")
metadata <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/18S/for_MBON/decontaminated/072720/18S_master_metadata.csv")
metadata$sample_name <- as.character(metadata$sample_name)
## Depth
# Change depth to character so that the mutate line will work
metadata$depth <- as.character(metadata$depth)
# Change surface to 1 m, bottom to 20 m for Florida samples
metadata %>% mutate(.,depth = ifelse(depth %in% c("Surface","SUR"),"1",
ifelse(depth %in% c("Bottom","BOT"),"20",depth))) -> metadata
metadata$depth <- as.numeric(as.character(metadata$depth))
# Flip the depths for the 67-60 samples in the C3PO19 cruise
metadata$depth <- as.numeric(as.character(metadata$depth))
metadata %>% mutate(.,depth = ifelse(metadata$sample_name %in% c("C3PO19c28_12_eDNA_HH"),150,
ifelse(metadata$sample_name %in% c("C3PO19c28_9_eDNA_HH"),100,
ifelse(metadata$sample_name %in% c("C3PO19c28_6_eDNA_HH"),60,
ifelse(metadata$sample_name %in% c("C3PO19c28_4_eDNA_HH"),20,
ifelse(metadata$sample_name %in% c("C3PO19c28_3_eDNA_HH"),0,depth)))))) -> metadata
## Sampling device
netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
netfilt_samples <- metadata$sample_name[netfilts]
metadata_updated <- metadata %>%
mutate(.,sampling_device = ifelse(sample_type %in% c("positive","negative","blank"),"CONTROL",
ifelse(sample_name %in% netfilt_samples,"Net filtrate",
ifelse(SAMPLING_platform %in% c("WESTERN FLYER", "RACHEL CARSON", "FULMAR", "WESTERNFLYER"," WESTERN FLYER", "POINT LOBOS", "Point Lobos","Shimada",
"Point lobos", "Point Sur", "McArthur II", "JohnMartin","JOHN MARTIN","POINT SUR","rachel carson","point lobos","McArthur II", "Western Flyer", "Fulmar", "Rachel Carson", "Rachel carson","CPF","POINT 0"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("REUBEN LASKER"),"Net filtrate",
ifelse(SAMPLING_platform %in% c("DocRicketts"),"ROV",
ifelse(SAMPLING_platform %in% c("Mesocosm"),"Mesocosm",
ifelse(depth %in% c("SUR","BOT"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("Tethys","MAKAI","DORADO","AKU","Dorado"),"3G ESP",
ifelse(samp_collection_device %in% c("CTD/SBE911 Rosette with Niskin bottle"),"Shipboard Niskin",
ifelse(SAMPLE_Collection_Device %in% c("CTD/SBE911 Rosette with Niskin bottle"),"Shipboard Niskin",
ifelse(samp_collection_device %in% c("SCUBA diver/bottle"),"SCUBA diver",
ifelse(samp_collection_device %in% c("Methot_net"),"Net filtrate",
ifelse(sample_name %in% c("CN18Fc27_bongo_","CN18Fc38_bongo_","CN18Fc43_bongo"),"Net filtrate",
metadata$samp_collection_device))))))))))))))
# Change depth to numeric
metadata_updated$depth <- as.numeric(as.character(metadata_updated$depth))
## Project
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$description <- as.character(metadata_updated$description)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated$PROJECT <- as.character(metadata_updated$PROJECT)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN18S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(description %in% c("")),description,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
ifelse(!(PROJECT %in% c("")), PROJECT,
project_name))))) -> metadata_updated
## Location
metadata_updated$geo_loc_NAme <- as.character(metadata_updated$geo_loc_NAme)
metadata_updated$geo_loc_name <- as.character(metadata_updated$geo_loc_name)
metadata_updated %>% mutate(.,geo_loc_name = ifelse(geo_loc_name %in% c(""),geo_loc_NAme,geo_loc_name)) -> metadata_updated
# Fix the FKNMS geo_loc_name
metadata_updated %>% mutate(.,geo_loc_name = ifelse(depth %in% c("SUR","BOT"),"USA:Florida:Keys",geo_loc_name)) -> metadata_updated
metadata_updated %>% mutate(.,geo_loc_name = ifelse(libraryID %in% c("M"),"USA:Florida:Keys",geo_loc_name)) -> metadata_updated
# Add in depth for DW addition sample from KOSMOS
metadata_updated["KOSMOSD15_DW2_UU",]$depth <- 30
# Create categorical depth field
metadata_updated %>% mutate(.,depth_cat = ifelse(metadata_updated$depth >= 0 & metadata_updated$depth <= 5.5,"0-5",
ifelse(metadata_updated$depth > 5.5 & metadata_updated$depth <= 20,"6-20",
ifelse(metadata_updated$depth > 20 & metadata_updated$depth < 29.5,"20-29",
ifelse(metadata_updated$depth >= 29.5 & metadata_updated$depth < 40,"30-39",
ifelse(metadata_updated$depth >= 40 & metadata_updated$depth < 60,"40-59",
ifelse(metadata_updated$depth >= 60 & metadata_updated$depth <= 80,"60-80",
ifelse(metadata_updated$depth >= 100 & metadata_updated$depth <= 199,"100-199",
ifelse(metadata_updated$depth > 199 & metadata_updated$depth <= 250,"200-250",
ifelse(metadata_updated$depth >= 280 & metadata_updated$depth <= 700,"280-700",
ifelse(metadata_updated$SAMPLING_platform %in% c("Mesocosm"),"0-5",
"600-700"))))))))))) -> metadata_updated
# Use lubridate to convert date/time to posixct
# First consolidate all date/time information into one column
metadata_updated$collection_date <- as.character(metadata_updated$collection_date)
metadata_updated$SAMPLING_date_time <- as.character(metadata_updated$SAMPLING_date_time)
metadata_updated %>% mutate(., SAMPLING_date_time = ifelse(is.na(SAMPLING_date_time) | SAMPLING_date_time == "", collection_date, SAMPLING_date_time)) -> metadata_updated
metadata_updated %>% mutate(.,date_time_updated = parse_date_time(SAMPLING_date_time,c("ymdHM","mdyHM","ymdHMS"))) -> metadata_updated
## Warning: 12 failed to parse.
# Let's add in a month field, so we can try to visualize seasonality
metadata_updated %>% mutate(.,month_updated = month(date_time_updated,label = TRUE, abbr = FALSE)) -> metadata_updated
metadata_updated$month_updated <- as.character(metadata_updated$month_updated)
# consolidate two month columns
metadata_updated$month <- as.character(metadata_updated$month)
metadata_updated$Month <- as.character(metadata_updated$Month)
metadata_updated %>% mutate(.,month = ifelse(is.na(month) | month == "",Month,month)) -> metadata_updated
metadata_updated %>% mutate(.,month_updated = ifelse(!(is.na(month_updated)),month_updated,
ifelse(month %in% c("Jan"),"January",
ifelse(month %in% c("Feb"),"February",
ifelse(month %in% c("Mar", "March"),"March",
ifelse(month %in% c("Apr"),"April",
ifelse(month %in% c("May"),"May",
ifelse(month %in% c("Jun"),"June",
ifelse(month %in% c("Jul"),"July",
ifelse(month %in% c("Aug"),"August",
ifelse(month %in% c("Sep"),"September",
ifelse(month %in% c("Oct"),"October",
ifelse(month %in% c("Nov"),"November",
ifelse(month %in% c("Dec"),"December",month_updated))))))))))))))-> metadata_updated
# Let's add a season field
metadata_updated %>% mutate(.,season = ifelse(month_updated %in% c("November","December","January"),"Winter",
ifelse(month_updated %in% c("February","March","April"),"Spring",
ifelse(month_updated %in% c("May","June","July"),"Summer",
ifelse(month_updated %in% c("August","September","October"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season <- factor(metadata_updated$season, levels = c("Winter","Spring","Summer","Fall"))
# Let's add a different season field - using the "conventional" seasons
metadata_updated %>% mutate(.,season_conv = ifelse(month_updated %in% c("January", "February", "March"),"Winter",
ifelse(month_updated %in% c("April", "May", "June"),"Spring",
ifelse(month_updated %in% c("July", "August", "September"),"Summer",
ifelse(month_updated %in% c("October", "November", "December"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season_conv <- factor(metadata_updated$season_conv, levels = c("Winter","Spring","Summer","Fall"))
metadata_updated$month_updated <- factor(metadata_updated$month_updated, levels = c("January", "February", "March", "April", "May", "June","July","August","September","October","November","December"))
# Fix the location for the FGNMS samples
metadata_updated %>% mutate(.,geo_loc_name = ifelse(metadata_updated$sample_name %in% c("MBON301_M","MBON302_M","MBON303_M","MBON304_M","MBON305_M","MBON306_M"),"USA:Texas:Flower Garden Banks",geo_loc_name)) -> metadata_updated
# Let's add a year field
metadata_updated %>% mutate(.,year_updated = year(date_time_updated)) -> metadata_updated
# Drop 67-70 for the C3PO19 cruise - this is the cast where the samples got mixed up
metadata_updated <- subset(metadata_updated, !(SAMPLING_station %in% c("67-70") & SAMPLING_cruise == "C3PO19"))
# Take out the non-eDNA samples from FK zooplankton study
metadata_updated <- subset(metadata_updated, !(type_sample %in% c("pre_filt","tissue","extr_blank","filt_blank")))
metadata_updated <- subset(metadata_updated, sample_type %in% c("environment","environmental"))
# Drop netfilt samples
metadata_updated <- subset(metadata_updated, !(sample_name %in% netfilt_sample_names))
metadata_updated <- subset(metadata_updated, !(sampling_device %in% c("Net filtrate")))
### Move sample names to rownames for export
metadata_updated <- metadata_updated[!(is.na(metadata_updated$sample_name)),]
rownames(metadata_updated) <- NULL
metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
asv_table <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/18S/for_MBON/decontaminated/072720/18S_master_ASV_table.csv",row.names = 1, check.names = FALSE)
destroyX = function(es) {
f = es
for (col in c(1:ncol(f))){ #for each column in dataframe
if (startsWith(colnames(f)[col], "X") == TRUE) { #if starts with 'X' ..
colnames(f)[col] <- substr(colnames(f)[col], 2, 100) #get rid of it
}
}
assign(deparse(substitute(es)), f, inherits = TRUE) #assign corrected data to original name
}
asv_table <- destroyX(asv_table)
tax_table_df <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/18S/for_MBON/decontaminated/072720/18S_master_tax_table.csv")
tax_table_df$Kingdom <- as.character(tax_table_df$Kingdom)
tax_table_df$Phylum <- as.character(tax_table_df$Phylum)
tax_table_df$Class <- as.character(tax_table_df$Class)
tax_table_df$Order <- as.character(tax_table_df$Order)
tax_table_df$Family <- as.character(tax_table_df$Family)
tax_table_df$Genus <- as.character(tax_table_df$Genus)
tax_table_df$Species <- as.character(tax_table_df$Species)
# Mutate Class
tax_table_df %>% mutate(.,Class = ifelse(tax_table_df$Species %in% c("Acantharian sp. 6201"),"Acantharea",
ifelse(tax_table_df$Order %in% c("Diplonemea"), "Diplonemea",
ifelse(tax_table_df$Order %in% c("Choanoflagellata"),"Choanoflagellatea",
tax_table_df$Class)))) -> tax_table_df
tax_table_df <- column_to_rownames(tax_table_df,"ASV")
asv_table <- otu_table(asv_table,taxa_are_rows = TRUE)
sample_data <- sample_data(metadata_updated)
tax_table <- tax_table(as.matrix(tax_table_df))
master_phyloseq_18S <- merge_phyloseq(asv_table,sample_data,tax_table)
## Decontaminate
# no humans
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Hominidae"
)
# no cows
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Bovidae"
)
# no pigs
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Suidae"
)
# no dogs
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Canidae"
)
# no chickens
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Genus != "Gallus"
)
# no turkeys
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Genus != "Meleagris"
)
# no cats
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Family != "Felidae"
)
# no rats!! and other rodents
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Order != "Rodentia"
)
# no rabbits
master_phyloseq_18S <- master_phyloseq_18S %>%
subset_taxa(
Order != "Lagomorpha"
)
master_phyloseq_18S <- prune_taxa(taxa_sums(master_phyloseq_18S) > 0,master_phyloseq_18S)
master_phyloseq_18S
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 29262 taxa and 1020 samples ]
## sample_data() Sample Data: [ 1020 samples by 149 sample variables ]
## tax_table() Taxonomy Table: [ 29262 taxa by 7 taxonomic ranks ]
kosmos_18S_phyloseq <- prune_samples(sample_data(master_phyloseq_18S)$SAMPLING_project %in% c("KOSMOS"),master_phyloseq_18S)
kosmos_18S_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_phyloseq)>0,kosmos_18S_phyloseq)
kosmos_18S_16_phyloseq <- prune_samples(sample_data(kosmos_18S_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), kosmos_18S_phyloseq)
kosmos_18S_16_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_16_phyloseq)>0,kosmos_18S_16_phyloseq)
kosmos_18S_asv_table <- as.data.frame(as(otu_table(kosmos_18S_phyloseq),"matrix"))
rownames_to_column(kosmos_18S_asv_table,"ASV") -> kosmos_18S_asv_table
kosmos_18S_tax_table <- as.data.frame(as(tax_table(kosmos_18S_phyloseq),"matrix"))
rownames_to_column(kosmos_18S_tax_table,"ASV") -> kosmos_18S_tax_table
kosmos_18S <- left_join(kosmos_18S_asv_table, kosmos_18S_tax_table, by = "ASV")
# write.csv(kosmos_18S, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_18S_072720.csv")
metadata <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/12S/for_MBON/decontaminated/092120/12S_master_metadata.csv")
metadata$sample_name <- as.character(metadata$sample_name)
metadata <- subset(metadata, sample_type %in% c("environmental"))
# Drop 67-70 for the C3PO19 cruise
metadata <- subset(metadata, !(SAMPLING_station %in% c("67-70") & SAMPLING_cruise == "C3PO19"))
# Flip the depths for station 67-60 to fix them.
metadata$depth <- as.numeric(as.character(metadata$depth))
metadata %>% mutate(.,depth = ifelse(metadata$sample_name %in% c("C3PO19c28_12_eDNA_QQ"),150,
ifelse(metadata$sample_name %in% c("C3PO19c28_9_eDNA_QQ"),100,
ifelse(metadata$sample_name %in% c("C3PO19c28_6_eDNA_QQ"),60,
ifelse(metadata$sample_name %in% c("C3PO19c28_4_eDNA_QQ"),20,
ifelse(metadata$sample_name %in% c("C3PO19c28_3_eDNA_QQ"),0,depth)))))) -> metadata
# Drop 67-70
metadata <- subset(metadata, !(SAMPLING_station %in% c("67-70")))
## Create a new field for sampling device, since the metadata has different formats, but always has a SAMPLING_platform field that we can use to determine what the sampling device was.
# netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
# netfilt_samples <- metadata$sample_name[netfilts]
netfilts <- grepl('netfilt|bongo|Bongo', metadata$sample_name)
netfilt_samples <- subset(metadata, metadata$sample_name %in% metadata$sample_name[netfilts])
netfilt_sample_names <- netfilt_samples$sample_name
metadata_updated <- metadata %>%
mutate(.,sampling_device = ifelse(sample_type %in% c("positive","negative","blank"),"CONTROL",
ifelse(sample_name %in% netfilt_sample_names,"Net filtrate",
ifelse(SAMPLING_platform %in% c("WESTERN FLYER","MCARTHUR II", "RACHEL CARSON", "FULMAR", "WESTERNFLYER"," WESTERN FLYER", "POINT LOBOS", "Point Lobos","Point lobos", "Point Sur", "McArthur II", "JohnMartin","JOHN MARTIN","POINT SUR","rachel carson","point lobos","McArthur II", "Western Flyer", "Fulmar", "Rachel Carson", "Rachel carson","CPF","POINT 0"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("REUBEN LASKER"),"Net filtrate",
ifelse(SAMPLING_cruise %in% c("Flyer2018", "Lasker2018"),"Shipboard Niskin",
ifelse(SAMPLING_platform %in% c("DocRicketts"),"ROV",
ifelse(SAMPLING_platform %in% c("Mesocosm"),"Mesocosm",
ifelse(SAMPLING_platform %in% c("SCUBA") | SAMPLING_platform_type %in% c("scuba"),"SCUBA",
ifelse(SAMPLING_platform %in% c("Tethys","MAKAI","DORADO","AKU"),"3G ESP",metadata$sample_type))))))))))
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN12S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
project_name))) -> metadata_updated
## Project
metadata_updated$SAMPLING_cruise <- as.character(metadata_updated$SAMPLING_cruise)
metadata_updated$SAMPLING_campaign <- as.character(metadata_updated$SAMPLING_campaign)
metadata_updated$project_name <- as.character(metadata_updated$project_name)
metadata_updated %>% mutate(.,project_new = ifelse(SAMPLING_cruise %in% c("CANON11","CANON13","CANON15","CANON16","CANON17S","CN12S","CN18F","C3PO19","CN19S","Lasker2018","Flyer2018"),SAMPLING_cruise,
ifelse(!(SAMPLING_campaign %in% c("")), SAMPLING_campaign,
project_name))) -> metadata_updated
## Location
metadata_updated$geo_loc_name <- as.character(metadata_updated$geo_loc_name)
# Drop netfilt samples
metadata_updated <- subset(metadata_updated, !(sample_name %in% netfilt_sample_names))
metadata_updated <- subset(metadata_updated, !(sampling_device %in% c("Net filtrate")))
## Depth as categorical
# sort(metadata_updated$depth)
metadata_updated %>% mutate(.,depth_cat = ifelse(metadata_updated$depth >= 0 & metadata_updated$depth <= 5.5,"0-5",
ifelse(metadata_updated$depth > 5.5 & metadata_updated$depth <= 20,"6-20",
ifelse(metadata_updated$depth > 20 & metadata_updated$depth < 29.5,"20-29",
ifelse(metadata_updated$depth >= 29.5 & metadata_updated$depth < 40,"30-39",
ifelse(metadata_updated$depth >= 40 & metadata_updated$depth < 60,"40-59",
ifelse(metadata_updated$depth >= 60 & metadata_updated$depth <= 80,"60-80",
ifelse(metadata_updated$depth >= 100 & metadata_updated$depth <= 199,"100-199",
ifelse(metadata_updated$depth > 199 & metadata_updated$depth <= 250,"200-250",
ifelse(metadata_updated$depth >= 280 & metadata_updated$depth <= 500,"280-500",
ifelse(metadata_updated$depth >= 600 & metadata_updated$depth <= 700,"600-700",
"unknown"))))))))))) -> metadata_updated
metadata_updated$depth_cat <- factor(metadata_updated$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
# Use lubridate to convert date/time to posixct
metadata_updated$SAMPLING_date_time <- as.character(metadata_updated$SAMPLING_date_time)
metadata_updated %>% mutate(.,date_time_updated = parse_date_time(SAMPLING_date_time,c("ymdHM","mdyHM","ymdHMS"))) -> metadata_updated
class(metadata_updated$date_time_updated)
## [1] "POSIXct" "POSIXt"
# Let's add in a month field, so we can try to visualize seasonality
metadata_updated %>% mutate(.,month_updated = month(date_time_updated,label = TRUE, abbr = FALSE)) -> metadata_updated
metadata_updated$month_updated <- as.character(metadata_updated$month_updated)
# Let's add a season field
metadata_updated %>% mutate(.,season = ifelse(month_updated %in% c("November","December","January"),"Winter",
ifelse(month_updated %in% c("February","March","April"),"Spring",
ifelse(month_updated %in% c("May","June","July"),"Summer",
ifelse(month_updated %in% c("August","September","October"),"Fall",month_updated))))) -> metadata_updated
metadata_updated$season <- factor(metadata_updated$season, levels = c("Winter","Spring","Summer","Fall"))
metadata_updated$month_updated <- factor(metadata_updated$month_updated, levels = c("January", "February", "March", "April", "May", "June","July","August","September","October","November","December"))
# Let's add a year field
metadata_updated %>% mutate(.,year_updated = year(date_time_updated)) -> metadata_updated
metadata_updated <- metadata_updated[!(is.na(metadata_updated$sample_name)),]
# Rearrange so that the touchdown replicates are first, and then drop the classic replicates
metadata_updated %>% arrange(.,-PCR_settings) %>% distinct(.,original_name,.keep_all = TRUE) -> metadata_updated
## Warning in Ops.factor(PCR_settings): '-' not meaningful for factors
# Change sample_name to row names
rownames(metadata_updated) <- NULL
metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
asv_table <- read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/12S/for_MBON/decontaminated/092120/12S_master_ASV_table.csv",row.names = 1, check.names = FALSE)
destroyX = function(es) {
f = es
for (col in c(1:ncol(f))){ #for each column in dataframe
if (startsWith(colnames(f)[col], "X") == TRUE) { #if starts with 'X' ..
colnames(f)[col] <- substr(colnames(f)[col], 2, 100) #get rid of it
}
}
assign(deparse(substitute(es)), f, inherits = TRUE) #assign corrected data to original name
}
asv_table <- destroyX(asv_table)
asv_table <- otu_table(asv_table,taxa_are_rows = TRUE)
# rownames(metadata_updated) <- NULL
# metadata_updated <- column_to_rownames(metadata_updated,"sample_name")
sample_data <- sample_data(metadata_updated)
tax_table <- tax_table(as.matrix(read.csv(file = "/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/12S/for_MBON/decontaminated/092120/12S_master_tax_table.csv",row.names = 1)))
master_phyloseq_allplates <- merge_phyloseq(asv_table,sample_data,tax_table)
master_phyloseq_allplates <- prune_samples(sample_data(master_phyloseq_allplates)$libraryID != "II",master_phyloseq_allplates)
master_phyloseq_allplates <- prune_samples(!(sample_data(master_phyloseq_allplates)$libraryID == "QQ" & sample_data(master_phyloseq_allplates)$PCR_settings == "classic"),master_phyloseq_allplates)
nsamples(master_phyloseq_allplates)
## [1] 775
ntaxa(master_phyloseq_allplates)
## [1] 29013
sum(sample_sums(master_phyloseq_allplates))
## [1] 27266169
#phyloseq_species <- tax_glom(master_phyloseq,taxrank=rank_names(phyloseq)[7], NArm = FALSE)
master_phyloseq_12S <- master_phyloseq_allplates %>%
subset_taxa(
!(Kingdom %in% c("Archaea","Bacteria"))
)
## Decontaminate
# no humans
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Hominidae"
)
# no cows
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Bovidae"
)
# no pigs
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Suidae"
)
# no dogs
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Canidae"
)
# no chickens
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Genus != "Gallus"
)
# no turkeys
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Genus != "Meleagris"
)
# no cats
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Order != "Carnivora"
)
# no rats!! and other rodents
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Family != "Felidae"
)
# no rabbits
master_phyloseq_12S <- master_phyloseq_12S %>%
subset_taxa(
Order != "Lagomorpha"
)
# Subset out KOSMOS samples
kosmos_12S_phyloseq <- prune_samples(sample_data(master_phyloseq_12S)$SAMPLING_project %in% c("KOSMOS"),master_phyloseq_12S)
kosmos_12S_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_phyloseq)>0,kosmos_12S_phyloseq)
kosmos_12S_16_phyloseq <- prune_samples(sample_data(kosmos_12S_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), kosmos_12S_phyloseq)
kosmos_12S_16_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_16_phyloseq)>0,kosmos_12S_16_phyloseq)
kosmos_12S_asv_table <- as.data.frame(as(otu_table(kosmos_12S_phyloseq),"matrix"))
rownames_to_column(kosmos_12S_asv_table,"ASV") -> kosmos_12S_asv_table
kosmos_12S_tax_table <- as.data.frame(as(tax_table(kosmos_12S_phyloseq),"matrix"))
rownames_to_column(kosmos_12S_tax_table,"ASV") -> kosmos_12S_tax_table
kosmos_12S <- left_join(kosmos_12S_asv_table, kosmos_12S_tax_table, by = "ASV")
# write.csv(kosmos_12S, "/Users/mmin/Documents/KOSMOS_data_local/kosmos_12S_072720.csv")
chloroplast_asv <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/kosmos_chloroplast_asv_table.csv",row.names = 1)
rownames(chloroplast_asv) <- NULL
chloroplast_asv %>% column_to_rownames(.,"asv") -> chloroplast_asv
kosmos_16S_metadata <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/KOSMOS_sample_data_16S.csv",row.names = 1)
chloroplast_tax_table <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/kosmos_chloroplast_tax_table_noX.csv", row.names = 1)
rownames(chloroplast_tax_table) <- NULL
chloroplast_tax_table %>% column_to_rownames(.,"asv") -> chloroplast_tax_table
chloroplast_phyloseq <- merge_phyloseq(otu_table(chloroplast_asv,taxa_are_rows = TRUE),tax_table(as.matrix(chloroplast_tax_table)),sample_data(kosmos_16S_metadata))
# Subset only M1 and Pacific samples
chloroplast_16_phyloseq <- prune_samples(sample_data(chloroplast_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), chloroplast_phyloseq)
# Drop all taxa with zero reads
chloroplast_16_phyloseq <- prune_taxa(taxa_sums(chloroplast_16_phyloseq)>0,chloroplast_16_phyloseq)
bacteria_tax_table <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/kosmos_bacteria_tax_table.csv", row.names = 1)
bacteria_tax_table$Kingdom <- as.character(bacteria_tax_table$Kingdom)
bacteria_tax_table$Phylum <- as.character(bacteria_tax_table$Phylum)
bacteria_tax_table$Class <- as.character(bacteria_tax_table$Class)
bacteria_tax_table$Order <- as.character(bacteria_tax_table$Order)
bacteria_tax_table$Family <- as.character(bacteria_tax_table$Family)
bacteria_tax_table$Genus <- as.character(bacteria_tax_table$Genus)
bacteria_tax_table$Species <- as.character(bacteria_tax_table$Species)
# Use a for loop to change all unwanted values to blank
unwanted_taxonomy <- c("uncultured","ambiguous","unidentified","metagenome")
columns <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
for (i in seq(1,length(unwanted_taxonomy))){
bacteria_tax_table$Species[grep(unwanted_taxonomy[i], bacteria_tax_table$Species,ignore.case = TRUE)] <- ""
bacteria_tax_table$Genus[grep(unwanted_taxonomy[i], bacteria_tax_table$Genus,ignore.case = TRUE)] <- ""
bacteria_tax_table$Family[grep(unwanted_taxonomy[i], bacteria_tax_table$Family,ignore.case = TRUE)] <- ""
bacteria_tax_table$Order[grep(unwanted_taxonomy[i], bacteria_tax_table$Order,ignore.case = TRUE)] <- ""
bacteria_tax_table$Class[grep(unwanted_taxonomy[i], bacteria_tax_table$Class,ignore.case = TRUE)] <- ""
bacteria_tax_table$Phylum[grep(unwanted_taxonomy[i], bacteria_tax_table$Phylum,ignore.case = TRUE)] <- ""
bacteria_tax_table$Kingdom[grep(unwanted_taxonomy[i], bacteria_tax_table$Kingdom,ignore.case = TRUE)] <- ""
}
bacteria_asv <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/kosmos_bacteria_asv_table.csv",row.names = 1)
# bacteria_asv %>% column_to_rownames(.,"asv") -> bacteria_asv
kosmos_16S_metadata <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/KOSMOS_sample_data_16S.csv",row.names = 1)
# bacteria_tax_table <- read.csv("/Users/mmin/Documents/KOSMOS_data_local/Worden_lab_data/kosmos_bacteria_tax_table.csv", row.names = 1)
bacteria_phyloseq <- merge_phyloseq(otu_table(bacteria_asv,taxa_are_rows = TRUE),tax_table(as.matrix(bacteria_tax_table)),sample_data(kosmos_16S_metadata))
# Subset out only M1 and Pacific samples
bacteria_16_phyloseq <- prune_samples(sample_data(bacteria_phyloseq)$SAMPLING_station_number %in% c("M1","MP"), bacteria_phyloseq)
# Remove all taxa that have zero reads
bacteria_16_phyloseq <- prune_taxa(taxa_sums(bacteria_16_phyloseq) > 0,bacteria_16_phyloseq)
bacteria_16_phyloseq_pro <- subset_taxa(bacteria_16_phyloseq, Genus == "Prochlorococcus_MIT9313")
bacteria_16_phyloseq_syn <- subset_taxa(bacteria_16_phyloseq, Genus %in% c("Synechococcus_PCC-7902","Synechococcus_CC9902"))
sample_sums(bacteria_16_phyloseq_pro)
## KOSMOS.D1.M1BS KOSMOS.D1.MPBS KOSMOS.D15.M1AS KOSMOS.D15.MPAS KOSMOS.D24.M1A
## 61 191 57 309 29
## KOSMOS.D24.MPAS KOSMOS.D32.M1AS KOSMOS.D32.MPAS KOSMOS.D36.M1AS KOSMOS.D36.MPAS
## 21 33 88 25 57
## KOSMOS.D42.M1AS KOSMOS.D42.MPAS KOSMOS.D48.M1AS KOSMOS.D48.MPAS KOSMOS.D8.M1AS
## 0 286 11 145 25
## KOSMOS.D8.MPAS
## 2577
sample_sums(bacteria_16_phyloseq_syn)
## KOSMOS.D1.M1BS KOSMOS.D1.MPBS KOSMOS.D15.M1AS KOSMOS.D15.MPAS KOSMOS.D24.M1A
## 23446 16508 1410 5798 760
## KOSMOS.D24.MPAS KOSMOS.D32.M1AS KOSMOS.D32.MPAS KOSMOS.D36.M1AS KOSMOS.D36.MPAS
## 2445 485 2013 112 3069
## KOSMOS.D42.M1AS KOSMOS.D42.MPAS KOSMOS.D48.M1AS KOSMOS.D48.MPAS KOSMOS.D8.M1AS
## 121 1825 609 2120 983
## KOSMOS.D8.MPAS
## 10254
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_COI_16_phyloseq)
ntaxa <- ntaxa(kosmos_COI_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_COI_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_COI_16_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_COI <- data.frame(statistic_names,statistic_values)
overview_statistics_COI
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 2434
## 3 Number of Reads 1219350
## 4 Phyla 30
## 5 Classes 64
## 6 Orders 131
## 7 Families 196
## 8 Genera 69
## 9 Species 54
COI_phyla <- unique(tax_table_all.df$Phylum)
COI_genera <- unique(tax_table_all.df$Genus)
kosmos_COI_M1_phyloseq <- prune_samples(sample_data(kosmos_COI_16_phyloseq)$SAMPLING_station_number == "M1", kosmos_COI_16_phyloseq)
kosmos_COI_M1_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_M1_phyloseq)>0,kosmos_COI_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_COI_M1_phyloseq)
ntaxa <- ntaxa(kosmos_COI_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_COI_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_COI_M1_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_COI_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_COI_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 1720
## 3 Number of Reads 625120
## 4 Phyla 25
## 5 Classes 54
## 6 Orders 105
## 7 Families 142
## 8 Genera 45
## 9 Species 37
# Calculate mean + SD
kosmos_COI_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_COI_M1_phyloseq))))
kosmos_COI_M1_sample_sums <- rownames_to_column(kosmos_COI_M1_sample_sums,"sample")
colnames(kosmos_COI_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_COI_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 78140 22861.3 8082.689 19112.52
kosmos_COI_MP_phyloseq <- prune_samples(sample_data(kosmos_COI_16_phyloseq)$SAMPLING_station_number == "MP", kosmos_COI_16_phyloseq)
kosmos_COI_MP_phyloseq <- prune_taxa(taxa_sums(kosmos_COI_MP_phyloseq)>0,kosmos_COI_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_COI_MP_phyloseq)
ntaxa <- ntaxa(kosmos_COI_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_COI_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_COI_MP_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_COI_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_COI_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 1890
## 3 Number of Reads 594230
## 4 Phyla 28
## 5 Classes 58
## 6 Orders 116
## 7 Families 169
## 8 Genera 56
## 9 Species 47
# Calculate mean + SD
kosmos_COI_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_COI_MP_phyloseq))))
kosmos_COI_MP_sample_sums <- rownames_to_column(kosmos_COI_MP_sample_sums,"sample")
colnames(kosmos_COI_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_COI_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 74278.75 6018.431 2127.837 5031.534
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_18S_16_phyloseq)
ntaxa <- ntaxa(kosmos_18S_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_18S_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_18S_16_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_18S <- data.frame(statistic_names,statistic_values)
overview_statistics_18S
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 3296
## 3 Number of Reads 1759671
## 4 Phyla 41
## 5 Classes 95
## 6 Orders 183
## 7 Families 254
## 8 Genera 269
## 9 Species 262
phyla_18S <- unique(tax_table_all.df$Phylum)
genera_18S <- unique(tax_table_all.df$Genus)
kosmos_18S_M1_phyloseq <- prune_samples(sample_data(kosmos_18S_16_phyloseq)$SAMPLING_station_number == "M1", kosmos_18S_16_phyloseq)
kosmos_18S_M1_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_M1_phyloseq)>0,kosmos_18S_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_18S_M1_phyloseq)
ntaxa <- ntaxa(kosmos_18S_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_18S_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_18S_M1_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_18S_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_18S_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 2105
## 3 Number of Reads 1006473
## 4 Phyla 31
## 5 Classes 76
## 6 Orders 139
## 7 Families 189
## 8 Genera 214
## 9 Species 196
# Calculate mean + SD
kosmos_18S_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_18S_M1_phyloseq))))
kosmos_18S_M1_sample_sums <- rownames_to_column(kosmos_18S_M1_sample_sums,"sample")
colnames(kosmos_18S_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_18S_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 125809.1 26365.08 9321.464 22041.76
kosmos_18S_MP_phyloseq <- prune_samples(sample_data(kosmos_18S_16_phyloseq)$SAMPLING_station_number == "MP", kosmos_18S_16_phyloseq)
kosmos_18S_MP_phyloseq <- prune_taxa(taxa_sums(kosmos_18S_MP_phyloseq)>0,kosmos_18S_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_18S_MP_phyloseq)
ntaxa <- ntaxa(kosmos_18S_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_18S_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_18S_MP_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_18S_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_18S_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 2825
## 3 Number of Reads 753198
## 4 Phyla 41
## 5 Classes 89
## 6 Orders 169
## 7 Families 226
## 8 Genera 241
## 9 Species 233
# Calculate mean + SD
kosmos_18S_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_18S_MP_phyloseq))))
kosmos_18S_MP_sample_sums <- rownames_to_column(kosmos_18S_MP_sample_sums,"sample")
colnames(kosmos_18S_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_18S_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 94149.75 19034.32 6729.649 15913.09
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_12S_16_phyloseq)
ntaxa <- ntaxa(kosmos_12S_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_12S_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for genus, thus now -3
ngenera <- length(unique(tax_table_all.df$Genus)) - 3
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, thus now -3
nspecies <- length(unique(tax_table_all.df$Species)) - 3
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_12S <- data.frame(statistic_names,statistic_values)
overview_statistics_12S
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 445
## 3 Number of Reads 216841
## 4 Phyla 0
## 5 Classes 2
## 6 Orders 21
## 7 Families 30
## 8 Genera 31
## 9 Species 25
phyla_12S <- unique(tax_table_all.df$Phylum)
genera_12S <- unique(tax_table_all.df$Genus)
kosmos_12S_M1_phyloseq <- prune_samples(sample_data(kosmos_12S_16_phyloseq)$SAMPLING_station_number == "M1", kosmos_12S_16_phyloseq)
kosmos_12S_M1_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_M1_phyloseq)>0,kosmos_12S_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_12S_M1_phyloseq)
ntaxa <- ntaxa(kosmos_12S_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_12S_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_12S_M1_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unassigned", "unknown", and "no_hit",
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, but there is a value for "bacterium, thus -4
nspecies <- length(unique(tax_table_all.df$Species)) - 4
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_12S_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_12S_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 348
## 3 Number of Reads 105635
## 4 Phyla -1
## 5 Classes 1
## 6 Orders 15
## 7 Families 21
## 8 Genera 23
## 9 Species 20
# Calculate mean + SD
kosmos_12S_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_12S_M1_phyloseq))))
kosmos_12S_M1_sample_sums <- rownames_to_column(kosmos_12S_M1_sample_sums,"sample")
colnames(kosmos_12S_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_12S_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 13204.38 9063.226 3204.334 7577.047
kosmos_12S_MP_phyloseq <- prune_samples(sample_data(kosmos_12S_16_phyloseq)$SAMPLING_station_number == "MP", kosmos_12S_16_phyloseq)
kosmos_12S_MP_phyloseq <- prune_taxa(taxa_sums(kosmos_12S_MP_phyloseq)>0,kosmos_12S_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(kosmos_12S_MP_phyloseq)
ntaxa <- ntaxa(kosmos_12S_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(kosmos_12S_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(kosmos_12S_MP_phyloseq),"matrix"))
# "unknown", "unassigned", and "no_hit" are unique values for taxonomy, thus -3 for number of unique counts
nphyla <- length(unique(tax_table_all.df$Phylum)) - 3
nclasses <- length(unique(tax_table_all.df$Class)) - 3
norders <- length(unique(tax_table_all.df$Order)) - 3
nfamilies <- length(unique(tax_table_all.df$Family)) - 3
# g_ is a unique value in addition to "unknown", "unassigned", and "no_hit", thus now -4
ngenera <- length(unique(tax_table_all.df$Genus)) - 4
# s_ is a unique value in addition to "unassigned", and "no_hit", but there is no "unknown value for species, but there is a value for "bacterium, thus -4
nspecies <- length(unique(tax_table_all.df$Species)) - 4
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_12S_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_12S_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 246
## 3 Number of Reads 111206
## 4 Phyla 0
## 5 Classes 2
## 6 Orders 20
## 7 Families 26
## 8 Genera 24
## 9 Species 18
# Calculate mean + SD
kosmos_12S_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(kosmos_12S_MP_phyloseq))))
kosmos_12S_MP_sample_sums <- rownames_to_column(kosmos_12S_MP_sample_sums,"sample")
colnames(kosmos_12S_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_12S_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 13900.75 6019.894 2128.354 5032.758
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(chloroplast_16_phyloseq)
ntaxa <- ntaxa(chloroplast_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(chloroplast_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(chloroplast_16_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_chloroplast <- data.frame(statistic_names,statistic_values)
overview_statistics_chloroplast
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 258
## 3 Number of Reads 241783
## 4 Phyla 7
## 5 Classes 14
## 6 Orders 16
## 7 Families 15
## 8 Genera 4
## 9 Species 5
chloroplast_phyla <- unique(tax_table_all.df$Phylum)
chloroplast_genera <- unique(tax_table_all.df$Genus)
chloroplast_M1_phyloseq <- prune_samples(sample_data(chloroplast_16_phyloseq)$SAMPLING_station_number == "M1", chloroplast_16_phyloseq)
chloroplast_M1_phyloseq <- prune_taxa(taxa_sums(chloroplast_M1_phyloseq)>0,chloroplast_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(chloroplast_M1_phyloseq)
ntaxa <- ntaxa(chloroplast_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(chloroplast_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(chloroplast_M1_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_chloroplast_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_chloroplast_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 158
## 3 Number of Reads 73050
## 4 Phyla 5
## 5 Classes 13
## 6 Orders 15
## 7 Families 14
## 8 Genera 2
## 9 Species 2
# Calculate mean + SD
kosmos_chloroplast_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(chloroplast_M1_phyloseq))))
kosmos_chloroplast_M1_sample_sums <- rownames_to_column(kosmos_chloroplast_M1_sample_sums,"sample")
colnames(kosmos_chloroplast_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_chloroplast_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 9131.25 6511.553 2302.182 5443.794
chloroplast_MP_phyloseq <- prune_samples(sample_data(chloroplast_16_phyloseq)$SAMPLING_station_number == "MP", chloroplast_16_phyloseq)
chloroplast_MP_phyloseq <- prune_taxa(taxa_sums(chloroplast_MP_phyloseq)>0,chloroplast_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(chloroplast_MP_phyloseq)
ntaxa <- ntaxa(chloroplast_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(chloroplast_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(chloroplast_MP_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_chloroplast_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_chloroplast_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 203
## 3 Number of Reads 168733
## 4 Phyla 7
## 5 Classes 13
## 6 Orders 16
## 7 Families 15
## 8 Genera 4
## 9 Species 5
# Calculate mean + SD
kosmos_chloroplast_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(chloroplast_MP_phyloseq))))
kosmos_chloroplast_MP_sample_sums <- rownames_to_column(kosmos_chloroplast_MP_sample_sums,"sample")
colnames(kosmos_chloroplast_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_chloroplast_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 21091.62 10737.76 3796.371 8976.992
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(bacteria_16_phyloseq)
ntaxa <- ntaxa(bacteria_16_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(bacteria_16_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(bacteria_16_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_bacteria <- data.frame(statistic_names,statistic_values)
overview_statistics_bacteria
## statistic_names statistic_values
## 1 Number of Samples 16
## 2 Number of ASVs 5786
## 3 Number of Reads 3125836
## 4 Phyla 38
## 5 Classes 74
## 6 Orders 174
## 7 Families 256
## 8 Genera 478
## 9 Species 16
bacteria_phyla <- unique(tax_table_all.df$Phylum)
bacteria_genera <- unique(tax_table_all.df$Genus)
bacteria_M1_phyloseq <- prune_samples(sample_data(bacteria_16_phyloseq)$SAMPLING_station_number == "M1", bacteria_16_phyloseq)
bacteria_M1_phyloseq <- prune_taxa(taxa_sums(bacteria_M1_phyloseq)>0,bacteria_M1_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(bacteria_M1_phyloseq)
ntaxa <- ntaxa(bacteria_M1_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(bacteria_M1_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(bacteria_M1_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_bacteria_M1 <- data.frame(statistic_names,statistic_values)
overview_statistics_bacteria_M1
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 3223
## 3 Number of Reads 1624665
## 4 Phyla 24
## 5 Classes 43
## 6 Orders 130
## 7 Families 183
## 8 Genera 348
## 9 Species 10
# Calculate mean + SD
kosmos_bacteria_M1_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(bacteria_M1_phyloseq))))
kosmos_bacteria_M1_sample_sums <- rownames_to_column(kosmos_bacteria_M1_sample_sums,"sample")
colnames(kosmos_bacteria_M1_sample_sums)[2] <- "reads"
summarySE(kosmos_bacteria_M1_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 203083.1 20875.4 7380.568 17452.27
bacteria_MP_phyloseq <- prune_samples(sample_data(bacteria_16_phyloseq)$SAMPLING_station_number == "MP", bacteria_16_phyloseq)
bacteria_MP_phyloseq <- prune_taxa(taxa_sums(bacteria_MP_phyloseq)>0,bacteria_MP_phyloseq)
statistic_names <- c("Number of Samples", "Number of ASVs","Number of Reads","Phyla","Classes","Orders","Families","Genera","Species")
nsamples <- nsamples(bacteria_MP_phyloseq)
ntaxa <- ntaxa(bacteria_MP_phyloseq)
nreads <- sum(colSums(as.data.frame(otu_table(bacteria_MP_phyloseq))))
tax_table_all.df <- as.data.frame(as(tax_table(bacteria_MP_phyloseq),"matrix"))
# a blank value is considered unique, thus -1 for all levels of taxonomy
nphyla <- length(unique(tax_table_all.df$Phylum)) - 1
nclasses <- length(unique(tax_table_all.df$Class)) - 1
norders <- length(unique(tax_table_all.df$Order)) - 1
nfamilies <- length(unique(tax_table_all.df$Family)) - 1
ngenera <- length(unique(tax_table_all.df$Genus)) - 1
nspecies <- length(unique(tax_table_all.df$Species)) -1
statistic_values <- c(nsamples,ntaxa,nreads,nphyla,nclasses,norders,nfamilies,ngenera,nspecies)
overview_statistics_bacteria_MP <- data.frame(statistic_names,statistic_values)
overview_statistics_bacteria_MP
## statistic_names statistic_values
## 1 Number of Samples 8
## 2 Number of ASVs 4139
## 3 Number of Reads 1501171
## 4 Phyla 38
## 5 Classes 72
## 6 Orders 166
## 7 Families 242
## 8 Genera 414
## 9 Species 8
# Calculate mean + SD
kosmos_bacteria_MP_sample_sums <- as.data.frame(colSums(as.data.frame(otu_table(bacteria_MP_phyloseq))))
kosmos_bacteria_MP_sample_sums <- rownames_to_column(kosmos_bacteria_MP_sample_sums,"sample")
colnames(kosmos_bacteria_MP_sample_sums)[2] <- "reads"
summarySE(kosmos_bacteria_MP_sample_sums,measurevar = "reads")
## .id N reads sd se ci
## 1 <NA> 8 187646.4 36331.09 12844.98 30373.55
COI_phyla <- as.character(COI_phyla)
phyla_18S <- as.character(phyla_18S)
phyla_12S <- as.character(phyla_12S)
chloroplast_phyla <- as.character(chloroplast_phyla)
bacteria_phyla <- as.character(bacteria_phyla)
# don't count 12S for now, unless we decide to include them in the paper
all_phyla <- c(COI_phyla, phyla_12S, phyla_18S, bacteria_phyla, chloroplast_phyla)
length(unique(all_phyla)) -4
## [1] 86
# unknown, blank, no_hit, and unassigned are all in here, thus -4
sort(unique(all_phyla))
## [1] "" "Acidobacteria"
## [3] "Actinobacteria" "Annelida"
## [5] "Apicomplexa" "Arthropoda"
## [7] "Ascomycota" "Bacillariophyta"
## [9] "Bacteroidetes" "Basidiomycota"
## [11] "Blastocladiomycota" "Brachiopoda"
## [13] "Bryozoa" "Calditrichaeota"
## [15] "Cercozoa" "Chaetognatha"
## [17] "Chloroflexi" "Chlorophyta"
## [19] "Chordata" "Chytridiomycota"
## [21] "Ciliophora" "Cloacimonetes"
## [23] "Cnidaria" "Cryptophyta"
## [25] "Ctenophora" "Cyanobacteria"
## [27] "Dadabacteria" "Deferribacteres"
## [29] "Deinococcus-Thermus" "Dependentiae"
## [31] "Discoba" "Discosea"
## [33] "Echinodermata" "Epsilonbacteraeota"
## [35] "Euglenida" "Euglenozoa"
## [37] "Fibrobacteres" "Firmicutes"
## [39] "Foraminifera" "Fusobacteria"
## [41] "Gastrotricha" "Gemmatimonadetes"
## [43] "Haptista" "Haptophyta"
## [45] "Hemichordata" "Heterolobosea"
## [47] "Hydrogenedentes" "Imbricatea"
## [49] "Kiritimatiellaeota" "Latescibacteria"
## [51] "Lentisphaerae" "Margulisbacteria"
## [53] "Marinimicrobia_(SAR406_clade)" "Microsporidia"
## [55] "Mollusca" "Mucoromycota"
## [57] "Nematoda" "Nemertea"
## [59] "Nitrospinae" "no_hit"
## [61] "Ochrophyta" "Omnitrophicaeota"
## [63] "Onychophora" "Patescibacteria"
## [65] "Phoronida" "Picozoa"
## [67] "Placozoa" "Planctomycetes"
## [69] "Platyhelminthes" "Poribacteria"
## [71] "Porifera" "Proteobacteria"
## [73] "Rappemonad" "Rhodophyta"
## [75] "Rokubacteria" "Rotifera"
## [77] "RsaHF231" "Spirochaetes"
## [79] "Streptophyta" "Synergistetes"
## [81] "Tenericutes" "Thermotogae"
## [83] "Tubulinea" "unassigned"
## [85] "unknown" "Verrucomicrobia"
## [87] "WOR-1" "WPS-2"
## [89] "Zixibacteria" "Zoopagomycota"
COI_genera <- as.character(COI_genera)
genera_18S <- as.character(genera_18S)
genera_12S <- as.character(genera_12S)
chloroplast_genera <- as.character(chloroplast_genera)
bacteria_genera <- as.character(bacteria_genera)
# don't count 12S for now, unless we decide to include them in the paper
all_genera <- c(COI_genera, genera_12S, genera_18S, bacteria_genera, chloroplast_genera)
# unknown, blank, no_hit, g_, and unassigned are all in here, thus -5
# Control 1 = MilliQ, Control 2 = RO
kosmos_COI_field_blanks <- prune_samples(sample_names(kosmos_COI_phyloseq) %in% c("KOSMOSD3_MQCBa_X","KOSMOSD3_ROCBa_X"),kosmos_COI_phyloseq)
kosmos_COI_field_blanks <- prune_taxa(taxa_sums(kosmos_COI_field_blanks) > 0, kosmos_COI_field_blanks)
kosmos_18S_field_blanks <- prune_samples(sample_names(kosmos_18S_phyloseq) %in% c("KOSMOSD3_MQCBa_UU","KOSMOSD3_ROCBa_UU"),kosmos_18S_phyloseq)
kosmos_18S_field_blanks <- prune_taxa(taxa_sums(kosmos_18S_field_blanks) > 0, kosmos_18S_field_blanks)
kosmos_12S_field_blanks <- prune_samples(sample_names(kosmos_12S_phyloseq) %in% c("KOSMOSD3_MQCBa_TT","KOSMOSD3_ROCBa_TT"),kosmos_12S_phyloseq)
kosmos_12S_field_blanks <- prune_taxa(taxa_sums(kosmos_12S_field_blanks) > 0, kosmos_12S_field_blanks)
kosmos_bacteria_field_blanks <- prune_samples(sample_names(bacteria_phyloseq) %in% c("KOSMOS.D3.control1","KOSMOS.D3.control2"),bacteria_phyloseq)
kosmos_bacteria_field_blanks <- prune_taxa(taxa_sums(kosmos_bacteria_field_blanks) > 0, kosmos_bacteria_field_blanks)
kosmos_chloroplast_field_blanks <- prune_samples(sample_names(chloroplast_phyloseq) %in% c("KOSMOS.D3.control1","KOSMOS.D3.control2"),chloroplast_phyloseq)
kosmos_chloroplast_field_blanks <- prune_taxa(taxa_sums(kosmos_chloroplast_field_blanks) > 0, kosmos_chloroplast_field_blanks)
kosmos_COI_field_blanks_class <- tax_glom(kosmos_COI_field_blanks,taxrank=rank_names(kosmos_COI_field_blanks)[3], NArm = FALSE)
kosmos_18S_field_blanks_class <- tax_glom(kosmos_18S_field_blanks,taxrank=rank_names(kosmos_18S_field_blanks)[3], NArm = FALSE)
kosmos_12S_field_blanks_class <- tax_glom(kosmos_12S_field_blanks,taxrank=rank_names(kosmos_12S_field_blanks)[3], NArm = FALSE)
kosmos_bacteria_field_blanks_class <- tax_glom(kosmos_bacteria_field_blanks,taxrank=rank_names(kosmos_bacteria_field_blanks)[3], NArm = FALSE)
kosmos_chloroplast_field_blanks_class <- tax_glom(kosmos_chloroplast_field_blanks,taxrank=rank_names(kosmos_chloroplast_field_blanks)[3], NArm = FALSE)
# taxa_sums(kosmos_COI_field_blanks)
# taxa_sums(kosmos_18S_field_blanks)
# taxa_sums(kosmos_12S_field_blanks)
# taxa_sums(kosmos_bacteria_field_blanks_class)
# taxa_sums(kosmos_chloroplast_field_blanks_class)
# Create a table that documents ASVs found in field blanks and their abundance vs their abundance in the environmental samples
kosmos_COI_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_COI_field_blanks),"matrix"))
kosmos_COI_field_blanks_tax_table <- rownames_to_column(kosmos_COI_field_blanks_tax_table,"ASV")
kosmos_COI_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_COI_field_blanks_tax_table
kosmos_COI_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_COI_field_blanks))
kosmos_COI_field_blanks_asv_table <- rownames_to_column(kosmos_COI_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_COI_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOSD3_MQCBa_X + KOSMOSD3_ROCBa_X)/(sum(kosmos_COI_field_blanks_asv_table$KOSMOSD3_MQCBa_X) + sum(kosmos_COI_field_blanks_asv_table$KOSMOSD3_ROCBa_X))) -> kosmos_COI_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_COI_16_asv_table <- as.data.frame(otu_table(kosmos_COI_16_phyloseq))
kosmos_COI_16_asv_table <- rownames_to_column(kosmos_COI_16_asv_table, "ASV")
kosmos_COI_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_COI_16_asv_table
kosmos_COI_envt_means <- kosmos_COI_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_COI_field_blanks_v_envt <- left_join(kosmos_COI_field_blanks_asv_table,kosmos_COI_envt_means, by = "ASV")
kosmos_COI_field_blanks_v_envt <- kosmos_COI_field_blanks_v_envt[order(-kosmos_COI_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_COI_field_blanks_v_envt <- left_join(kosmos_COI_field_blanks_v_envt, kosmos_COI_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_COI_field_blanks_v_envt, "/Users/mmin/Documents/KOSMOS_paper_figures/field_blanks/COI_field_blank_composition.csv")
kosmos_18S_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_18S_field_blanks),"matrix"))
kosmos_18S_field_blanks_tax_table <- rownames_to_column(kosmos_18S_field_blanks_tax_table,"ASV")
kosmos_18S_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_18S_field_blanks_tax_table
kosmos_18S_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_18S_field_blanks))
kosmos_18S_field_blanks_asv_table <- rownames_to_column(kosmos_18S_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_18S_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOSD3_MQCBa_UU + KOSMOSD3_ROCBa_UU)/(sum(kosmos_18S_field_blanks_asv_table$KOSMOSD3_MQCBa_UU) + sum(kosmos_18S_field_blanks_asv_table$KOSMOSD3_ROCBa_UU))) -> kosmos_18S_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_18S_16_asv_table <- as.data.frame(otu_table(kosmos_18S_16_phyloseq))
kosmos_18S_16_asv_table <- rownames_to_column(kosmos_18S_16_asv_table, "ASV")
kosmos_18S_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_18S_16_asv_table
kosmos_18S_envt_means <- kosmos_18S_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_18S_field_blanks_v_envt <- left_join(kosmos_18S_field_blanks_asv_table,kosmos_18S_envt_means, by = "ASV")
kosmos_18S_field_blanks_v_envt <- kosmos_18S_field_blanks_v_envt[order(-kosmos_18S_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_18S_field_blanks_v_envt <- left_join(kosmos_18S_field_blanks_v_envt, kosmos_18S_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_18S_field_blanks_v_envt, "/Users/mmin/Documents/KOSMOS_paper_figures/field_blanks/18S_field_blank_composition.csv")
kosmos_12S_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_12S_field_blanks),"matrix"))
kosmos_12S_field_blanks_tax_table <- rownames_to_column(kosmos_12S_field_blanks_tax_table,"ASV")
kosmos_12S_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_12S_field_blanks_tax_table
kosmos_12S_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_12S_field_blanks))
kosmos_12S_field_blanks_asv_table <- rownames_to_column(kosmos_12S_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_12S_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOSD3_MQCBa_TT + KOSMOSD3_ROCBa_TT)/(sum(kosmos_12S_field_blanks_asv_table$KOSMOSD3_MQCBa_TT) + sum(kosmos_12S_field_blanks_asv_table$KOSMOSD3_ROCBa_TT))) -> kosmos_12S_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_12S_16_asv_table <- as.data.frame(otu_table(kosmos_12S_16_phyloseq))
kosmos_12S_16_asv_table <- rownames_to_column(kosmos_12S_16_asv_table, "ASV")
kosmos_12S_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_12S_16_asv_table
kosmos_12S_envt_means <- kosmos_12S_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_12S_field_blanks_v_envt <- left_join(kosmos_12S_field_blanks_asv_table,kosmos_12S_envt_means, by = "ASV")
kosmos_12S_field_blanks_v_envt <- kosmos_12S_field_blanks_v_envt[order(-kosmos_12S_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_12S_field_blanks_v_envt <- left_join(kosmos_12S_field_blanks_v_envt, kosmos_12S_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_12S_field_blanks_v_envt, "/Users/mmin/Documents/KOSMOS_paper_figures/field_blanks/12S_field_blank_composition.csv")
kosmos_bacteria_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_bacteria_field_blanks),"matrix"))
kosmos_bacteria_field_blanks_tax_table <- rownames_to_column(kosmos_bacteria_field_blanks_tax_table,"ASV")
kosmos_bacteria_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_bacteria_field_blanks_tax_table
kosmos_bacteria_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_bacteria_field_blanks))
kosmos_bacteria_field_blanks_asv_table <- rownames_to_column(kosmos_bacteria_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_bacteria_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOS.D3.control1 + KOSMOS.D3.control2)/(sum(kosmos_bacteria_field_blanks_asv_table$KOSMOS.D3.control1) + sum(kosmos_bacteria_field_blanks_asv_table$KOSMOS.D3.control2))) -> kosmos_bacteria_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_bacteria_16_asv_table <- as.data.frame(otu_table(bacteria_16_phyloseq))
kosmos_bacteria_16_asv_table <- rownames_to_column(kosmos_bacteria_16_asv_table, "ASV")
kosmos_bacteria_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_bacteria_16_asv_table
kosmos_bacteria_envt_means <- kosmos_bacteria_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_bacteria_field_blanks_v_envt <- left_join(kosmos_bacteria_field_blanks_asv_table,kosmos_bacteria_envt_means, by = "ASV")
kosmos_bacteria_field_blanks_v_envt <- kosmos_bacteria_field_blanks_v_envt[order(-kosmos_bacteria_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_bacteria_field_blanks_v_envt <- left_join(kosmos_bacteria_field_blanks_v_envt, kosmos_bacteria_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_bacteria_field_blanks_v_envt, "/Users/mmin/Documents/KOSMOS_paper_figures/field_blanks/bacteria_field_blank_composition.csv")
kosmos_chloroplast_field_blanks_tax_table <- as.data.frame(as(tax_table(kosmos_chloroplast_field_blanks),"matrix"))
kosmos_chloroplast_field_blanks_tax_table <- rownames_to_column(kosmos_chloroplast_field_blanks_tax_table,"ASV")
kosmos_chloroplast_field_blanks_tax_table %>% mutate_all(as.character) -> kosmos_chloroplast_field_blanks_tax_table
kosmos_chloroplast_field_blanks_asv_table <- as.data.frame(otu_table(kosmos_chloroplast_field_blanks))
kosmos_chloroplast_field_blanks_asv_table <- rownames_to_column(kosmos_chloroplast_field_blanks_asv_table, "ASV")
# Calculate the average proportion of each ASV in the field blanks
kosmos_chloroplast_field_blanks_asv_table %>% mutate(., field_blank_mean_perc = (KOSMOS.D3.control1 + KOSMOS.D3.control2)/(sum(kosmos_chloroplast_field_blanks_asv_table$KOSMOS.D3.control1) + sum(kosmos_chloroplast_field_blanks_asv_table$KOSMOS.D3.control2))) -> kosmos_chloroplast_field_blanks_asv_table
# Calculate the average proportion of each ASV in the environmental samples
kosmos_chloroplast_16_asv_table <- as.data.frame(otu_table(chloroplast_16_phyloseq))
kosmos_chloroplast_16_asv_table <- rownames_to_column(kosmos_chloroplast_16_asv_table, "ASV")
kosmos_chloroplast_16_asv_table %>% mutate(., envt_mean_perc = (rowSums(.[,2:17]))/(sum(colSums(.[,2:17])))) -> kosmos_chloroplast_16_asv_table
kosmos_chloroplast_envt_means <- kosmos_chloroplast_16_asv_table[,c("ASV","envt_mean_perc")]
kosmos_chloroplast_field_blanks_v_envt <- left_join(kosmos_chloroplast_field_blanks_asv_table,kosmos_chloroplast_envt_means, by = "ASV")
kosmos_chloroplast_field_blanks_v_envt <- kosmos_chloroplast_field_blanks_v_envt[order(-kosmos_chloroplast_field_blanks_v_envt$field_blank_mean_perc),]
kosmos_chloroplast_field_blanks_v_envt <- left_join(kosmos_chloroplast_field_blanks_v_envt, kosmos_chloroplast_field_blanks_tax_table, by = "ASV")
write.csv(kosmos_chloroplast_field_blanks_v_envt, "/Users/mmin/Documents/KOSMOS_paper_figures/field_blanks/chloroplast_field_blank_composition.csv")
Note When running the DEICODE PCA, we are manually flipping the sign of our ordination results so that they all align nicely for the plot.
legend_df <- data.frame(c("Pacific","Mesocosm 1 (M1)"),c(1,1),c(0.8,1))
colnames(legend_df) <- c("station","position_x","position_y")
legend_gg <- ggplot(data = legend_df, aes(x = position_x, y = position_y, color = station, shape = station))+
geom_point(size = 14)+
geom_text(data = subset(legend_df,station == "Pacific"), aes(x = position_x+0.2, y = position_y+0.0175, color = station,label = station),hjust = 0, size = 18)+
geom_text(data = subset(legend_df,station == "Mesocosm 1 (M1)"), aes(x = position_x+0.2, y = position_y, color = station,label = station),hjust = 0, size = 18)+
scale_color_manual(name = "station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
theme(panel.background = element_rect(fill="white"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
plot.margin = margin(0, 0, 0, 0, "cm"),)+
ylim(0.7,1.1)+
xlim(0.9,3.5)
# annotate("text",label = "Mesocosm 1 (M1)", x = 31.5, y = 20.55,size = 6,adj = 0)+ #mesocosm
# annotate("text",label = "Pacific", x = 31.5, y = 20.35, size = 6, adj = 0)+ #pacific
legend_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/PCA_legend_v2.png",legend_gg, width = 7, height = 2)
kosmos_COI_16_asv_table <- as.data.frame(otu_table(kosmos_COI_16_phyloseq))
kosmos_COI_16_asv_table <- rownames_to_column(kosmos_COI_16_asv_table,"#OTUID")
write.table(kosmos_COI_16_asv_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/COI/kosmos_COI_16_asv_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_COI_16_tax_table <- as.data.frame(as(tax_table(kosmos_COI_16_phyloseq),"matrix"))
kosmos_COI_16_tax_table <- rownames_to_column(kosmos_COI_16_tax_table,"#OTUID")
write.table(kosmos_COI_16_tax_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/COI/kosmos_COI_16_tax_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_COI_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_COI_16_phyloseq)))
kosmos_COI_16samples_metadata <- rownames_to_column(kosmos_COI_16samples_metadata,"#SampleID")
kosmos_COI_16samples_metadata <- kosmos_COI_16samples_metadata[,!(names(kosmos_COI_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M15","P15","M1","P1","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_COI_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(15,15,1,1,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_COI_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_COI_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_COI_16samples_metadata
# kosmos_COI_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_COI_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_COI_16samples_metadata$sample))) -> kosmos_COI_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_COI_16samples_metadata,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/COI/kosmos_COI_16_sample_data_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/COI/ordination.qza")
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (56.2%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (42.9%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (0.9%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_COI_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
# Manually change sign of PC1
pca_data$PC1 <- pca_data$PC1*-1
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("P32","P15","P8","P1","M24","M32")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("P42","M1","M48","M42","M36")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M8","P48")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("P24","M15","P36")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.3,size = 12)+
xlim(NA,max(pca_data$PC1)+0.03)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
ggtitle("COI")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (56.2%)"
## [1] "PC2 (42.9%)"
pca_gg_sample
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/COI_pca_kosmos_3PCs_16samples_9.25.20.png",pca_gg_sample,width = 7,height = 6)
kosmos_18S_16_asv_table <- as.data.frame(otu_table(kosmos_18S_16_phyloseq))
kosmos_18S_16_asv_table <- rownames_to_column(kosmos_18S_16_asv_table,"#OTUID")
write.table(kosmos_18S_16_asv_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/18S/kosmos_18S_16_asv_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_18S_16_tax_table <- as.data.frame(as(tax_table(kosmos_18S_16_phyloseq),"matrix"))
kosmos_18S_16_tax_table <- rownames_to_column(kosmos_18S_16_tax_table,"#OTUID")
write.table(kosmos_18S_16_tax_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/18S/kosmos_18S_16_tax_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_18S_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_18S_16_phyloseq)))
kosmos_18S_16samples_metadata <- rownames_to_column(kosmos_18S_16samples_metadata,"#SampleID")
kosmos_18S_16samples_metadata <- kosmos_18S_16samples_metadata[,!(names(kosmos_18S_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M15","P15","M1","P1","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_18S_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(15,15,1,1,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_18S_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_18S_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_18S_16samples_metadata
# kosmos_18S_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_18S_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_18S_16samples_metadata$sample))) -> kosmos_18S_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_18S_16samples_metadata,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/18S/kosmos_18S_16_sample_data_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/18S/ordination.qza")
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (60.8%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (37.3%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (2%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_18S_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
# Manually change signs of PC1 and PC2
pca_data$PC1 <- pca_data$PC1*-1
pca_data$PC2 <- pca_data$PC2*-1
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M48","P24", "M8", "P32","P8")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c()),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M1","P1","P15","P48","P36","M36","M32")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("M42","P42","M15","M24")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(b)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
ggtitle("18S rRNA")+
xlim(NA,max(pca_data$PC1)+0.03)
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (60.8%)"
## [1] "PC2 (37.3%)"
pca_gg_sample
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/18S_pca_kosmos_3PCs_16samples_9.25.20.png",pca_gg_sample,width = 7,height = 6)
kosmos_12S_16_asv_table <- as.data.frame(otu_table(kosmos_12S_16_phyloseq))
kosmos_12S_16_asv_table <- rownames_to_column(kosmos_12S_16_asv_table,"#OTUID")
write.table(kosmos_12S_16_asv_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/kosmos_12S_16_asv_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_16_tax_table <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq),"matrix"))
kosmos_12S_16_tax_table <- rownames_to_column(kosmos_12S_16_tax_table,"#OTUID")
write.table(kosmos_12S_16_tax_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/kosmos_12S_16_tax_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_12S_16_phyloseq)))
kosmos_12S_16samples_metadata <- rownames_to_column(kosmos_12S_16samples_metadata,"#SampleID")
kosmos_12S_16samples_metadata <- kosmos_12S_16samples_metadata[,!(names(kosmos_12S_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M15","P15","M1","P1","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_12S_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(15,15,1,1,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_12S_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_12S_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_12S_16samples_metadata
# kosmos_12S_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_12S_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_12S_16samples_metadata$sample))) -> kosmos_12S_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_12S_16samples_metadata,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/kosmos_12S_16_sample_data_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/ordination.qza")
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (59.2%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (31.6%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (9.3%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_12S_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M48","P24", "M8", "P32","P8","P42")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("P48")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M1","P15","P36","M36","M32","M15")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("M42","M24","P1")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(b)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
plot.title = element_text(size = 50, face = "bold", hjust = 0.5),
legend.position = "none")+
ggtitle("12S")+
xlim(NA,max(pca_data$PC1)+0.03)
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (59.2%)"
## [1] "PC2 (31.6%)"
pca_gg_sample
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/12S_pca_kosmos_3PCs_16samples_9.25.20.png",pca_gg_sample,width = 7,height = 5)
kosmos_12S_chordata_16_phyloseq <- subset_taxa(kosmos_12S_16_phyloseq, Phylum == "Chordata")
kosmos_12S_chordata_16_asv_table <- as.data.frame(otu_table(kosmos_12S_chordata_16_phyloseq))
kosmos_12S_chordata_16_asv_table <- rownames_to_column(kosmos_12S_chordata_16_asv_table,"#OTUID")
write.table(kosmos_12S_chordata_16_asv_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/chordata/kosmos_12S_chordata_16_asv_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_chordata_16_tax_table <- as.data.frame(as(tax_table(kosmos_12S_chordata_16_phyloseq),"matrix"))
kosmos_12S_chordata_16_tax_table <- rownames_to_column(kosmos_12S_chordata_16_tax_table,"#OTUID")
write.table(kosmos_12S_chordata_16_tax_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/chordata/kosmos_12S_chordata_16_tax_table_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_12S_chordata_16samples_metadata <- as.data.frame(as.matrix(sample_data(kosmos_12S_chordata_16_phyloseq)))
kosmos_12S_chordata_16samples_metadata <- rownames_to_column(kosmos_12S_chordata_16samples_metadata,"#SampleID")
kosmos_12S_chordata_16samples_metadata <- kosmos_12S_chordata_16samples_metadata[,!(names(kosmos_12S_chordata_16samples_metadata) %in% c("sample_name"))]
# Add a sample name column
sample <- c("M1","P1","M15","P15","M24","P24","M32","P32","M36","P36","M42","P42","M48","P48","M8","P8")
kosmos_12S_chordata_16samples_metadata["sample"] <- sample
# Add a day of experiment column
day_of_experiment <- c(1,1,15,15,24,24,32,32,36,36,42,42,48,48,8,8)
kosmos_12S_chordata_16samples_metadata["day_of_experiment"] <- day_of_experiment
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_12S_chordata_16samples_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_12S_chordata_16samples_metadata
# kosmos_12S_chordata_16samples_metadata %>% mutate(.,SAMPLING_day = regmatches(kosmos_12S_chordata_16samples_metadata$sample, gregexpr("[[:digit:]]+", kosmos_12S_chordata_16samples_metadata$sample))) -> kosmos_12S_chordata_16samples_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_12S_chordata_16samples_metadata,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/chordata/kosmos_12S_chordata_16_sample_data_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
pco <- read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/12S/chordata/ordination.qza")
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (57.5%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (39.4%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (3.2%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_12S_16samples_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = day_of_experiment))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M48","P24", "M8", "P8","P42")),aes(x=PC1+0.007,y=PC2+0.05,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("P1","P48")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M1","P36","M36","M32","M15")),aes(x=PC1+0.009,y=PC2-0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("M42","M24","P32","P15")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = day_of_experiment),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
# annotate("text", x=Inf, y = Inf, label = "(b)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
xlim(min(pca_data$PC1)-0.03,max(pca_data$PC1)+0.03)+
ggtitle("12S")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (57.5%)"
## [1] "PC2 (39.4%)"
pca_gg_sample
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/12S_pca_kosmos_chordata_3PCs_16samples_9.25.20.png",pca_gg_sample,width = 7,height = 6)
chloroplast_16_asv_table <- as.data.frame(otu_table(chloroplast_16_phyloseq))
chloroplast_16_asv_table <- rownames_to_column(chloroplast_16_asv_table,"#OTUID")
write.table(chloroplast_16_asv_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_plastidial/kosmos_chloroplast_asv_table_16samples_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
chloroplast_16_tax_table<- as.data.frame(as(tax_table(chloroplast_16_phyloseq),"matrix"))
chloroplast_16_tax_table <- rownames_to_column(chloroplast_16_tax_table,"#OTUID")
write.table(chloroplast_16_tax_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_plastidial/kosmos_chloroplast_tax_table_16samples_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_16S_16_metadata <- as.data.frame(as.matrix(sample_data(chloroplast_16_phyloseq)))
kosmos_16S_16_metadata <- rownames_to_column(kosmos_16S_16_metadata,"#SampleID")
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_16S_16_metadata$sample <- as.character(kosmos_16S_16_metadata$sample)
kosmos_16S_16_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_16S_16_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_16S_16_metadata,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_plastidial/KOSMOS_sample_data_16S_16samples_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
pco<-read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_plastidial/ordination.qza")
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (55.2%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (37.4%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (7.3%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_16S_16_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = SAMPLING_day))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("P42","P32","P15","P8","P1","M24","M36")),aes(x=PC1+0.007,y=PC2+0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("M1","M48","M42","M32")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("M8","P48")),aes(x=PC1+0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("P24","M15","P36")),aes(x=PC1-0.007,y=PC2+0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(c)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
xlim(min(pca_data$PC1)-0.03,max(pca_data$PC1)+0.03)+
ggtitle("16S Plastidial")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (55.2%)"
## [1] "PC2 (37.4%)"
pca_gg_sample
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/16S_plastidial_pca_kosmos_3PCs_16samples_9.25.20.png",pca_gg_sample,width = 7,height = 6)
bacteria_16_asv_table <- as.data.frame(otu_table(bacteria_16_phyloseq))
bacteria_16_asv_table <- rownames_to_column(bacteria_16_asv_table,"#OTUID")
write.table(bacteria_16_asv_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_bacterial/kosmos_bacteria_asv_table_16samples_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
bacteria_16_tax_table<- as.data.frame(as(tax_table(bacteria_16_phyloseq),"matrix"))
bacteria_16_tax_table <- rownames_to_column(bacteria_16_tax_table,"#OTUID")
write.table(bacteria_16_tax_table,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_bacterial/kosmos_bacteria_tax_table_16samples_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
kosmos_16S_16_metadata <- as.data.frame(as.matrix(sample_data(bacteria_16_phyloseq)))
kosmos_16S_16_metadata <- rownames_to_column(kosmos_16S_16_metadata,"#SampleID")
# Add a column for group (M1 after OMZ water addition, or Pacific + first two M1 samples)
kosmos_16S_16_metadata$sample <- as.character(kosmos_16S_16_metadata$sample)
kosmos_16S_16_metadata %>% mutate(.,group = ifelse(sample %in% c("M15","M24","M32","M36","M42","M48"),"M1_postOMZ","P_all_M1_preOMZ")) -> kosmos_16S_16_metadata
# metadata_12S <- metadata_12S %>%
# dplyr::select("X.SampleID", everything())
# metadata_12S %>% dplyr::rename("#SampleID" = X.SampleID) -> metadata_12S
write.table(kosmos_16S_16_metadata,"/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_bacterial/KOSMOS_sample_data_bacteria_16samples_for_biom.txt",sep = "\t",row.names = FALSE,quote = FALSE)
pco<-read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_bacterial/ordination.qza")
label.PC1 <- paste0("PC1 (", round(pco$data$ProportionExplained$PC1, 3)*100,"%)")
label.PC1
## [1] "PC1 (62%)"
label.PC2 <- paste0("PC2 (", round(pco$data$ProportionExplained$PC2, 3)*100,"%)")
label.PC2
## [1] "PC2 (37.2%)"
label.PC3 <- paste0("PC3 (", round(pco$data$ProportionExplained$PC3, 3)*100,"%)")
label.PC3
## [1] "PC3 (0.9%)"
## Prepare PCA data for ggplot
pca_metadata <- kosmos_16S_metadata
pca_metadata %>% rownames_to_column(.,"#SampleID") -> pca_metadata
pca_metadata %>% dplyr::rename(., "SampleID" = "#SampleID") -> pca_metadata
#pca_metadata$depth_cat <- factor(pca_metadata$depth_cat,levels = c("0-5","6-20","20-29","30-39","40-59","60-80","100-199","200-250","280-500","600-700"))
pca_data <- pco$data$Vectors
#pca_data <- left_join(pca_data,pca_metadata,on = "SampleID")
pca_data <- right_join(pca_data,pca_metadata,on = "SampleID")
## Joining, by = "SampleID"
pca_data <- subset(pca_data, !(is.na(pca_data$PC1)))
## Plot PCA in ggplot
pca_gg_sample <- ggplot(pca_data,aes(x=PC1,y=PC2,color = SAMPLING_station_number,shape = SAMPLING_station_number, label = SAMPLING_day))+
geom_point(size = 6)+
# Subset some samples to be above right
geom_text(data = subset(pca_data,sample %in% c("M8","P42","P15","P1","M36")),aes(x=PC1+0.007,y=PC2+0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be below left
geom_text(data = subset(pca_data,sample %in% c("M1","M42","P8","M24")),aes(x=PC1-0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# Subset some samples to be below right
geom_text(data = subset(pca_data,sample %in% c("P48")),aes(x=PC1+0.007,y=PC2-0.04,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 0,size = 10)+
# Subset some samples to be above left
geom_text(data = subset(pca_data,sample %in% c("P24","M15","P36","P32","M48","M32")),aes(x=PC1-0.008,y=PC2+0.05,color = SAMPLING_station_number,label = SAMPLING_day),segment.alpha = 0,show_guide = FALSE,label.padding = 1,point.padding = 0.3,box.padding = 0.1,hjust = 1,size = 10)+
# geom_text(hjust = 1, vjust=-0.5, segment.alpha = 0, show_guide = FALSE)+
xlab(print(label.PC1))+
ylab(print(label.PC2))+
scale_color_manual(name = "Station", values = c("#e41a1c","#377eb8"), labels = c("Mesocosm 1", "Pacific"))+
scale_shape_manual(name = "Station", values = c(19,17), labels = c("Mesocosm 1", "Pacific"))+
annotate("text", x=Inf, y = Inf, label = "(d)", vjust=1.5, hjust=1.3,size = 12)+
theme(panel.background = element_rect(fill = "white",color = "black",size = 1),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
axis.title = element_text(size = 20,face = "bold"),
axis.text = element_blank(),
axis.ticks.length=unit(0, "cm"),
plot.margin = margin(0.25, 0, 0.25, 0.25, "cm"),
legend.position = "none",
plot.title = element_text(size = 50, face = "bold", hjust = 0.5))+
xlim(min(pca_data$PC1)-0.03,max(pca_data$PC1)+0.03)+
ggtitle("16S Bacterial")
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## Warning: `show_guide` has been deprecated. Please use `show.legend` instead.
## Warning: Ignoring unknown parameters: segment.alpha, label.padding,
## point.padding, box.padding
## [1] "PC1 (62%)"
## [1] "PC2 (37.2%)"
pca_gg_sample
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/16S_bacterial_pca_kosmos_3PCs_16samples_9.25.20.png",pca_gg_sample,width = 7,height = 6)
group_colors = c("Rhodophytes" = "#E15759",
"Bicosoecids" = "#F28E2B",
"Dinoflagellates" = "#B6992D",
"Prymnesiales" = "#499894",
"Diatoms" = "#4E79A7",
"Chlorophyta" = "#59A14F",
"Oomycetes" = "#F1CE63",
"Coccolithophores" = "#A0CBE8",
"Amoebozoa" = "#FF9D9A",
"Cercozoa" = "#FFBE7D",
"Cryptophytes" = "#8CD17D",
"Embryophytes" = "#D7B5A6",
"Silicoflagellates" = "#86BCB6",
"Copepods" = "#BAB0AC",
"Rotifers" = "#79706E",
"Proteobacteria" = "#9D7660",
"Bacteroidetes" = "#D4A6C8",
"Actinobacteria" = "#FABFD2",
"Marinimicrobia" = "#6a3d9a",
"Verrucomicrobia" = "#D37295",
"Patescibacteria" = "#91003f",
"Cyanobacteria" = "#B07AA1",
"Spirotrichs" = "#1b7837",
"Unassigned" = "gray10")
# Make a fake dataframe with all of the different taxa that we want
allgroups <- c("Rhodophytes",
"Bicosoecids",
"Dinoflagellates",
"Prymnesiales",
"Diatoms",
"Chlorophyta",
"Oomycetes",
"Coccolithophores",
"Amoebozoa",
"Cercozoa",
"Spirotrichs",
"Cryptophytes",
"Embryophytes",
"Silicoflagellates",
"Copepods",
"Rotifers",
"Proteobacteria",
"Bacteroidetes",
"Actinobacteria",
"Verrucomicrobia",
"Cyanobacteria",
"Patescibacteria",
"Marinimicrobia",
"Unassigned")
fakedata <- rep(0.05,24)
fakesample <- rep("nosample",24)
legend_data <- data.frame(allgroups,fakedata,fakesample)
legend_data$allgroups <- factor(legend_data$allgroups, levels = c("Rhodophytes",
"Bicosoecids",
"Dinoflagellates",
"Prymnesiales",
"Diatoms",
"Chlorophyta",
"Oomycetes",
"Coccolithophores",
"Amoebozoa",
"Cercozoa",
"Spirotrichs",
"Cryptophytes",
"Embryophytes",
"Silicoflagellates",
"Copepods",
"Rotifers",
"Proteobacteria",
"Bacteroidetes",
"Actinobacteria",
"Verrucomicrobia",
"Cyanobacteria",
"Patescibacteria",
"Marinimicrobia",
"Unassigned"))
legend_gg<-ggplot(legend_data, aes(x=fakesample,y=fakedata,fill=allgroups))+
geom_bar(stat='identity')+theme_minimal()+
scale_fill_manual(values = group_colors)+
theme(axis.text.x = element_blank(),
legend.position = "right",
axis.title=element_text(size=8),
plot.margin = unit(c(0,5,0,0), "pt"),
legend.text=element_text(size=10),
legend.title = element_blank()
)+
guides(fill=guide_legend(ncol=8))+
xlab("Sample")+
ylab("Proportion of Reads")
legend_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/combined_figure_legend_top30_092520.png",legend_gg,width = 21, height = 5)
# Make color scheme
group_colors = c("Rhodophytes" = "#E15759",
"Bicosoecids" = "#F28E2B",
"Dinoflagellates" = "#B6992D",
"Prymnesiales" = "#499894",
"Diatoms" = "#4E79A7",
"Chlorophyta" = "#59A14F",
"Oomycetes" = "#F1CE63",
"Coccolithophores" = "#A0CBE8",
"Amoebozoa" = "#FF9D9A",
"Cercozoa" = "#FFBE7D",
"Cryptophytes" = "#8CD17D",
"Embryophytes" = "#D7B5A6",
"Silicoflagellates" = "#86BCB6",
"Copepods" = "#BAB0AC",
"Rotifers" = "#79706E",
"Proteobacteria" = "#9D7660",
"Bacteroidetes" = "#D4A6C8",
"Actinobacteria" = "#FABFD2",
"Marinimicrobia" = "#6a3d9a",
"Verrucomicrobia" = "#D37295",
"Patescibacteria" = "#91003f",
"Cyanobacteria" = "#B07AA1",
"Spirotrichs" = "#1b7837",
"other" = "gray10")
pco <- read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/COI/ordination.qza")
PC_loadings_COI <- pco$data$Species
# Scaling PCA?
# max_PC1 <- max(abs(PC_loadings_COI$PC1))
# PC_loadings_COI %>% mutate(.,PC1_scaled = PC_loadings_COI$PC1/max_PC1) -> PC_loadings_COI
# kosmos_COI_16_tax_table
kosmos_COI_tax_table <- dplyr::rename(kosmos_COI_16_tax_table,FeatureID = "#OTUID")
PC_loadings_COI <- left_join(PC_loadings_COI,kosmos_COI_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_COI$Kingdom <- as.character(PC_loadings_COI$Kingdom)
PC_loadings_COI$Phylum <- as.character(PC_loadings_COI$Phylum)
PC_loadings_COI$Class <- as.character(PC_loadings_COI$Class)
PC_loadings_COI$Order <- as.character(PC_loadings_COI$Order)
PC_loadings_COI$Family <- as.character(PC_loadings_COI$Family)
PC_loadings_COI$Genus <- as.character(PC_loadings_COI$Genus)
PC_loadings_COI$Species <- as.character(PC_loadings_COI$Species)
# Add taxonomy field that shows the lowest level of taxonomy
PC_loadings_COI <- PC_loadings_COI[order(-PC_loadings_COI$PC1),]
PC_loadings_COI %>% mutate(.,taxonomy = ifelse(!(Species %in% c("unassigned","no_hit","s_","unknown")),Species,
# ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_COI$Genus, " sp."),
ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_COI$Genus),
ifelse(!(Family %in% c("unassigned","no_hit","unknown")),paste0("Family ", PC_loadings_COI$Family),
ifelse(!(Order %in% c("unassigned","no_hit","unknown")),paste0("Order ", PC_loadings_COI$Order),
ifelse(!(Class %in% c("unassigned","no_hit","unknown")),paste0("Class ", PC_loadings_COI$Class),
ifelse(!(Phylum %in% c("unassigned","no_hit","unknown")),paste0("Phylum ", PC_loadings_COI$Phylum),
"Unassigned"))))))) -> PC_loadings_COI
# Create new "Group" field that groups taxa together.
PC_loadings_COI %>% mutate(.,group =
# COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae"),"Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other")))))))))))))))))) -> PC_loadings_COI
#9D7660
#D4A6C8
#FABFD2
#BAB0AC
#F1CE63
unique(PC_loadings_COI$group)
## [1] "Rotifers" "Cercozoa" "Coccolithophores" "Oomycetes"
## [5] "other" "Amoebozoa" "Copepods" "Diatoms"
## [9] "Rhodophytes" "Prymnesiales" "Dinoflagellates" "Bicosoecids"
## [13] "Chlorophyta"
# Add in information from ASV_tophit_F_taxonomy table
tophit_taxonomy <- read.csv("/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/COI/X/ASV_tophit_Ftaxonomy.csv")
colnames(tophit_taxonomy)[1] <- "FeatureID"
colnames(tophit_taxonomy)[6] <- "precent_ID"
tophit_taxonomy <- tophit_taxonomy[,c(1:6,9)]
PC_loadings_COI <- left_join(PC_loadings_COI,tophit_taxonomy,by = "FeatureID")
#write.csv(PC_loadings_COI,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/PC_loadings_COI.csv")
### Normalize each sample to 1, have the proportion of reads be for each sample
kosmos_COI_asv_table <- as.data.frame(otu_table(kosmos_COI_16_phyloseq))
kosmos_COI_asv_table_matrix <- as.matrix(kosmos_COI_asv_table)
kosmos_COI_sample_sums <- as.vector(colSums(kosmos_COI_asv_table))
t(t(kosmos_COI_asv_table_matrix)/kosmos_COI_sample_sums) -> kosmos_COI_normalized_asv_table
kosmos_COI_normalized_asv_table <- as.data.frame(kosmos_COI_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
kosmos_COI_normalized_asv_table %>% rownames_to_column(.,"ASV") -> kosmos_COI_normalized_asv_table
kosmos_COI_normalized_asv_table %>% mutate(.,novercutoff = 0) -> kosmos_COI_normalized_asv_table
kosmos_COI_normalized_asv_table %>% column_to_rownames(.,"ASV") -> kosmos_COI_normalized_asv_table
for (i in seq(1:nrow(kosmos_COI_normalized_asv_table))){
kosmos_COI_normalized_asv_table$novercutoff[i] = sum(kosmos_COI_normalized_asv_table[i,] > 0.001)
}
kosmos_COI_normalized_asv_table_subset <- subset(kosmos_COI_normalized_asv_table, novercutoff >= 4)
kosmos_COI_normalized_asv_table_subset <- kosmos_COI_normalized_asv_table_subset[!(names(kosmos_COI_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(kosmos_COI_normalized_asv_table_subset)
PC_loadings_COI_subset <- subset(PC_loadings_COI, FeatureID %in% abundant_ASVs)
kosmos_COI_asv_table_for_supp <- rownames_to_column(kosmos_COI_asv_table, "FeatureID")
supp_table <- left_join(PC_loadings_COI_subset,kosmos_COI_asv_table_for_supp, by = "FeatureID")
# write.csv(supp_table,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/supplementary_table_1.csv")
# Plot all ASVs that are abundant (for supplemental figures)
COI_PCA_gg <- ggplot(PC_loadings_COI_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Class))+
geom_bar(stat = "identity")+
ylim(-0.25,0.25)+
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
annotate("segment", x = 15.5,xend = 15.5, y = 0, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = -0.2, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 8, y = -0.175,label = "Top 15 ASVs\n(negative)",size = 4.5,fontface = "bold")+
annotate("segment", x = 72.5,xend = 72.5, y = 0, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0.2, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 80, y = 0.175,label = "Top 15 ASVs\n(positive)",size = 4.5,fontface = "bold")+
# annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)+
scale_fill_tableau(palette = "Tableau 20")
COI_PCA_gg
# ggsave("/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/COI_PC1_unmerged_all_052220.png",COI_PCA_gg,width = 10, height = 7.5)
# Plot all taxa, top 15 on either side
PC_loadings_COI_30 <- PC_loadings_COI_subset[c(1:15, (nrow(PC_loadings_COI_subset)-14):nrow(PC_loadings_COI_subset)),]
COI_PCA_gg <- ggplot(PC_loadings_COI_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = group))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_COI_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black"),fontface = ifelse(!(Genus %in% c("g_","unassigned","no_hit")),"bold.italic","bold")), size = 5)+
ylim(-0.21,0.21)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(e)", vjust=1.5, hjust=1.25,size = 12)
COI_PCA_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/COI_PC1_unmerged_top30_072720.png",COI_PCA_gg,width = 7, height = 8.5)
pco <- read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/18S/ordination.qza")
PC_loadings_18S <- pco$data$Species
# Scaling PCA?
# max_PC1 <- max(abs(PC_loadings_18S$PC1))
# PC_loadings_18S %>% mutate(.,PC1_scaled = PC_loadings_18S$PC1/max_PC1) -> PC_loadings_18S
# kosmos_18S_16_tax_table
kosmos_18S_tax_table <- dplyr::rename(kosmos_18S_16_tax_table,FeatureID = "#OTUID")
PC_loadings_18S <- left_join(PC_loadings_18S,kosmos_18S_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_18S$Kingdom <- as.character(PC_loadings_18S$Kingdom)
PC_loadings_18S$Phylum <- as.character(PC_loadings_18S$Phylum)
PC_loadings_18S$Class <- as.character(PC_loadings_18S$Class)
PC_loadings_18S$Order <- as.character(PC_loadings_18S$Order)
PC_loadings_18S$Family <- as.character(PC_loadings_18S$Family)
PC_loadings_18S$Genus <- as.character(PC_loadings_18S$Genus)
PC_loadings_18S$Species <- as.character(PC_loadings_18S$Species)
# Add taxonomy field that shows the lowest level of taxonomy
PC_loadings_18S <- PC_loadings_18S[order(-PC_loadings_18S$PC1),]
PC_loadings_18S %>% mutate(.,taxonomy = ifelse(!(Species %in% c("unassigned","no_hit","s_","unknown","uncultured marine picoeukaryote")),Species,
# ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_18S$Genus, " sp."),
ifelse(!(Genus %in% c("unassigned","no_hit","g_","unknown")),paste0(PC_loadings_18S$Genus),
ifelse(!(Family %in% c("unassigned","no_hit","unknown")),paste0("Family ", PC_loadings_18S$Family),
ifelse(!(Order %in% c("unassigned","no_hit","unknown")),paste0("Order ", PC_loadings_18S$Order),
ifelse(!(Class %in% c("unassigned","no_hit","unknown")),paste0("Class ", PC_loadings_18S$Class),
ifelse(!(Phylum %in% c("unassigned","no_hit","unknown")),paste0("Phylum ", PC_loadings_18S$Phylum),
"Unassigned"))))))) -> PC_loadings_18S
# Create new "Group" field that groups taxa together.
PC_loadings_18S %>% mutate(.,group =
# 18S & COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae") | Genus == "Protaspis","Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
ifelse(Class == "Spirotrichea", "Spirotrichs",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other"))))))))))))))))))) -> PC_loadings_18S
#9D7660
#D4A6C8
#FABFD2
#BAB0AC
#F1CE63
# unique(PC_loadings_18S$group)
# Add in information from ASV_tophit_F_taxonomy table
tophit_taxonomy <- read.csv("/Users/mmin/Documents/eDNA_meta_analysis_local_data/banzai_dada2/18S/UU/ASV_tophit_Ftaxonomy.csv")
colnames(tophit_taxonomy)[1] <- "FeatureID"
colnames(tophit_taxonomy)[6] <- "precent_ID"
tophit_taxonomy <- tophit_taxonomy[,c(1:6,9)]
PC_loadings_18S <- left_join(PC_loadings_18S,tophit_taxonomy,by = "FeatureID")
#write.csv(PC_loadings_18S,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/PC_loadings_18S.csv")
### Normalize each sample to 1, have the proportion of reads be for each sample
kosmos_18S_asv_table <- as.data.frame(otu_table(kosmos_18S_16_phyloseq))
kosmos_18S_asv_table_matrix <- as.matrix(kosmos_18S_asv_table)
kosmos_18S_sample_sums <- as.vector(colSums(kosmos_18S_asv_table))
t(t(kosmos_18S_asv_table_matrix)/kosmos_18S_sample_sums) -> kosmos_18S_normalized_asv_table
kosmos_18S_normalized_asv_table <- as.data.frame(kosmos_18S_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
kosmos_18S_normalized_asv_table %>% rownames_to_column(.,"ASV") -> kosmos_18S_normalized_asv_table
kosmos_18S_normalized_asv_table %>% mutate(.,novercutoff = 0) -> kosmos_18S_normalized_asv_table
kosmos_18S_normalized_asv_table %>% column_to_rownames(.,"ASV") -> kosmos_18S_normalized_asv_table
for (i in seq(1:nrow(kosmos_18S_normalized_asv_table))){
kosmos_18S_normalized_asv_table$novercutoff[i] = sum(kosmos_18S_normalized_asv_table[i,] > 0.001)
}
kosmos_18S_normalized_asv_table_subset <- subset(kosmos_18S_normalized_asv_table, novercutoff >= 4)
kosmos_18S_normalized_asv_table_subset <- kosmos_18S_normalized_asv_table_subset[!(names(kosmos_18S_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(kosmos_18S_normalized_asv_table_subset)
PC_loadings_18S_subset <- subset(PC_loadings_18S, FeatureID %in% abundant_ASVs)
kosmos_18S_asv_table_for_supp <- rownames_to_column(kosmos_18S_asv_table, "FeatureID")
supp_table <- left_join(PC_loadings_18S_subset,kosmos_18S_asv_table_for_supp, by = "FeatureID")
# write.csv(supp_table,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/supplementary_table_1.csv")
# Plot all ASVs that are abundant (for supplemental figures)
PCA_gg_18S <- ggplot(PC_loadings_18S_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Class))+
geom_bar(stat = "identity")+
ylim(-0.25,0.25)+
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
annotate("segment", x = 15.5,xend = 15.5, y = 0, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = -0.2, yend = -0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 0,xend = 15.5, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 8, y = -0.175,label = "Top 15 ASVs\n(negative)",size = 4.5,fontface = "bold")+
annotate("segment", x = 72.5,xend = 72.5, y = 0, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0.2, yend = 0.2, color = "black", size = 1, lty = 3)+
annotate("segment", x = 72.5,xend = 88, y = 0, yend = 0, color = "black", size = 1, lty = 3)+
annotate("text", x = 80, y = 0.175,label = "Top 15 ASVs\n(positive)",size = 4.5,fontface = "bold")+
# annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)+
scale_fill_tableau(palette = "Tableau 20")
PCA_gg_18S
# ggsave("/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/temp/18S_PC1_unmerged_all_052220.png",PCA_gg_18S,width = 10, height = 7.5)
# Plot all taxa, top 15 on either side
PC_loadings_18S_30 <- PC_loadings_18S_subset[c(1:15, (nrow(PC_loadings_18S_subset)-14):nrow(PC_loadings_18S_subset)),]
PCA_gg_18S <- ggplot(PC_loadings_18S_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = group))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_18S_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black"),fontface = ifelse(!(Genus %in% c("g_","unassigned","no_hit")),"bold.italic","bold")), size = 5)+
ylim(-0.21,0.21)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(f)", vjust=1.5, hjust=1.25,size = 12)
PCA_gg_18S
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/18S_PC1_unmerged_top30_092520.png",PCA_gg_18S,width = 7, height = 8.5)
pco<-read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_plastidial/ordination.qza")
PC_loadings_chloroplast <- pco$data$Species
kosmos_chloroplast_tax_table <- dplyr::rename(chloroplast_16_tax_table,FeatureID = "#OTUID")
PC_loadings_chloroplast <- left_join(PC_loadings_chloroplast,kosmos_chloroplast_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_chloroplast$Kingdom <- as.character(PC_loadings_chloroplast$Kingdom)
PC_loadings_chloroplast$Supergroup <- as.character(PC_loadings_chloroplast$Supergroup)
PC_loadings_chloroplast$Phylum <- as.character(PC_loadings_chloroplast$Phylum)
PC_loadings_chloroplast$Class <- as.character(PC_loadings_chloroplast$Class)
PC_loadings_chloroplast$Subclass <- as.character(PC_loadings_chloroplast$Subclass)
PC_loadings_chloroplast$Order <- as.character(PC_loadings_chloroplast$Order)
PC_loadings_chloroplast$Family <- as.character(PC_loadings_chloroplast$Family)
PC_loadings_chloroplast$Genus <- as.character(PC_loadings_chloroplast$Genus)
PC_loadings_chloroplast$Species <- as.character(PC_loadings_chloroplast$Species)
# Add new taxonomy column
PC_loadings_chloroplast <- PC_loadings_chloroplast[order(-PC_loadings_chloroplast$PC1),]
PC_loadings_chloroplast %>% mutate(.,taxonomy = ifelse(!(Species %in% c("")),Species,
ifelse(!(Genus %in% c("")),paste0(PC_loadings_chloroplast$Genus),
# ifelse(!(Genus %in% c("")),paste0(PC_loadings_chloroplast$Genus, " sp."),
ifelse(!(Family %in% c("")),paste0("Family ", PC_loadings_chloroplast$Family),
ifelse(!(Order %in% c("")),paste0("Order ", PC_loadings_chloroplast$Order),
ifelse(!(Subclass %in% c("")),paste0("Subclass ", PC_loadings_chloroplast$Subclass),
ifelse(!(Class %in% c("")),paste0("Class ", PC_loadings_chloroplast$Class),
ifelse(!(Phylum %in% c("")),paste0("Phylum ", PC_loadings_chloroplast$Phylum),
"Unassigned")))))))) -> PC_loadings_chloroplast
PC_loadings_chloroplast %>% mutate(.,group =
# COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Class %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae"),"Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyochophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Prymnesiales",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other"))))))))))))))))))) -> PC_loadings_chloroplast
### Normalize each sample to 1, have the proportion of reads be for each sample
chloroplast_asv_table <- as.data.frame(otu_table(chloroplast_16_phyloseq))
chloroplast_asv_table_matrix <- as.matrix(chloroplast_asv_table)
chloroplast_sample_sums <- as.vector(colSums(chloroplast_asv_table))
t(t(chloroplast_asv_table_matrix)/chloroplast_sample_sums) -> chloroplast_normalized_asv_table
chloroplast_normalized_asv_table <- as.data.frame(chloroplast_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
chloroplast_normalized_asv_table %>% mutate(.,novercutoff = 0) -> chloroplast_normalized_asv_table
for (i in seq(1:nrow(chloroplast_normalized_asv_table))){
chloroplast_normalized_asv_table$novercutoff[i] = sum(chloroplast_normalized_asv_table[i,] > 0.001)
}
chloroplast_normalized_asv_table_subset <- subset(chloroplast_normalized_asv_table, novercutoff >= 4)
chloroplast_normalized_asv_table_subset <- chloroplast_normalized_asv_table_subset[!(names(chloroplast_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(chloroplast_normalized_asv_table_subset)
PC_loadings_chloroplast_subset <- subset(PC_loadings_chloroplast, FeatureID %in% abundant_ASVs)
chloroplast_PCA_gg <- ggplot(PC_loadings_chloroplast_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Family))+
geom_bar(stat = "identity")+
ylim(-0.31,0.31)+
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
# scale_fill_manual(values = PC_loadings_colors)+
annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)
chloroplast_PCA_gg
# Plot top 15 positive and negative, all reads
PC_loadings_chloroplast_30 <- PC_loadings_chloroplast_subset[c(1:15, (nrow(PC_loadings_chloroplast_subset)-14):nrow(PC_loadings_chloroplast_subset)),]
chloroplast_PCA_gg <- ggplot(PC_loadings_chloroplast_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = group))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_chloroplast_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black")), size = 5,fontface = "bold")+
ylim(-0.4,0.4)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(g)", vjust=1.5, hjust=1.25,size = 12)
chloroplast_PCA_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/16S_plastidial_PC1_unmerged_top30_092520.png",chloroplast_PCA_gg,width = 7, height = 8.5)
pco<-read_qza("/Users/mmin/Documents/KOSMOS_data_local/local_deicode_runs/FINAL/16S_bacterial/ordination.qza")
PC_loadings_bacteria <- pco$data$Species
kosmos_bacteria_tax_table <- dplyr::rename(bacteria_16_tax_table,FeatureID = "#OTUID")
PC_loadings_bacteria <- left_join(PC_loadings_bacteria,kosmos_bacteria_tax_table,by = "FeatureID")
# Change all taxonomic annotations to character
PC_loadings_bacteria$Kingdom <- as.character(PC_loadings_bacteria$Kingdom)
PC_loadings_bacteria$Phylum <- as.character(PC_loadings_bacteria$Phylum)
PC_loadings_bacteria$Class <- as.character(PC_loadings_bacteria$Class)
PC_loadings_bacteria$Order <- as.character(PC_loadings_bacteria$Order)
PC_loadings_bacteria$Family <- as.character(PC_loadings_bacteria$Family)
PC_loadings_bacteria$Genus <- as.character(PC_loadings_bacteria$Genus)
PC_loadings_bacteria$Species <- as.character(PC_loadings_bacteria$Species)
# Based on David's note, change the taxonomy of the "unassigned" bacterial ASV to Class Alphaproteobacteria, Phylum Proteobacteria, Kingdom Bacteria
PC_loadings_bacteria <- within(PC_loadings_bacteria, Kingdom[FeatureID == 'ec69e4a2fa57ecb7e50cf2e897b37a8e$Unassigned'] <- 'Bacteria')
PC_loadings_bacteria <- within(PC_loadings_bacteria, Phylum[FeatureID == 'ec69e4a2fa57ecb7e50cf2e897b37a8e$Unassigned'] <- 'Proteobacteria')
PC_loadings_bacteria <- within(PC_loadings_bacteria, Class[FeatureID == 'ec69e4a2fa57ecb7e50cf2e897b37a8e$Unassigned'] <- 'Alphaproteobacteria')
# Sort by PC1 loadings
PC_loadings_bacteria <- PC_loadings_bacteria[order(-PC_loadings_bacteria$PC1),]
PC_loadings_bacteria %>% mutate(.,taxonomy = ifelse(!(Species %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),Species,
ifelse(!(Genus %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0(PC_loadings_bacteria$Genus),
# ifelse(!(Genus %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0(PC_loadings_bacteria$Genus, " sp."),
ifelse(!(Family %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0("Family ", PC_loadings_bacteria$Family),
ifelse(!(Order %in% c("","uncultured_bacterium","uncultured_marine_bacterium","uncultured")),paste0("Order ", PC_loadings_bacteria$Order),
ifelse(!(Class %in% c("")),paste0("Class ", PC_loadings_bacteria$Class),
ifelse(!(Phylum %in% c("")),paste0("Phylum ", PC_loadings_bacteria$Phylum),
ifelse(!(Kingdom %in% c("","Unassigned")),paste0("Kingdom ", PC_loadings_bacteria$Kingdom),"Unassigned")))))))) -> PC_loadings_bacteria
PC_loadings_bacteria %>% mutate(.,group =
# COI
ifelse(Phylum %in% c("Bacillariophyta"), "Diatoms",
ifelse(Order %in% c("Isochrysidales"), "Coccolithophores",
ifelse(Class %in% c("Dinophyceae"),"Dinoflagellates",
ifelse(Phylum %in% c("Rhodophyta"),"Rhodophytes",
ifelse(Phylum %in% c("Rotifera"),"Rotifers",
ifelse(Family %in% c("Chrysochromulinaceae"),"Chrysochromulina",
ifelse(Class %in% c("Oomycetes"),"Oomycetes",
ifelse(Class %in% c("Hexanauplia"),"Copepods",
ifelse(Family %in% c("Cafeteriaceae"),"Bicosoecids",
ifelse(Family %in% c("Vexilliferidae"),"Amoebozoa",
ifelse(Family %in% c("Paulinellidae"),"Cercozoa",
ifelse(Family %in% c("Pycnococcaceae"),"Chlorophyta",
# Chloroplast
ifelse(Class %in% c("Cryptophyceae"),"Cryptophytes",
ifelse(Class %in% c("Embryophyceae"), "Embryophytes",
ifelse(Class %in% c("Dictyophyceae"), "Silicoflagellates",
ifelse(Family %in% c("Chrysochromulinaceae"),"Chrysochromulina",
ifelse(Class %in% c("Trebouxiophyceae"),"Chlorophyta",
"other")))))))))))))))))) -> PC_loadings_bacteria
# write.csv(PC_loadings_bacteria,"/Users/mmin/Documents/KOSMOS_data_local/PC_loadings_analysis/PC_loadings_bacteria.csv")
### Normalize each sample to 1, have the proportion of reads be for each sample
bacteria_asv_table <- as.data.frame(otu_table(bacteria_16_phyloseq))
bacteria_asv_table_matrix <- as.matrix(bacteria_asv_table)
bacteria_sample_sums <- as.vector(colSums(bacteria_asv_table))
t(t(bacteria_asv_table_matrix)/bacteria_sample_sums) -> bacteria_normalized_asv_table
bacteria_normalized_asv_table <- as.data.frame(bacteria_normalized_asv_table)
# Subset the ASV table based on a relative abundance threshold in multiple samples
# The problem with this approach is that it can eliminate taxa that don't have a lot of reads because of amplification bias, like Akashiwo
## Set threshold to at least 0.001 in at least 4 samples -> gives us 88 ASVs. Totally manageable. This is loose enough to keep Akashiwo in the picture.
# Add a column to the normalized asv table to store number of values over our cutoff (in this case 0.001)
bacteria_normalized_asv_table %>% mutate(.,novercutoff = 0) -> bacteria_normalized_asv_table
for (i in seq(1:nrow(bacteria_normalized_asv_table))){
bacteria_normalized_asv_table$novercutoff[i] = sum(bacteria_normalized_asv_table[i,] > 0.001)
}
bacteria_normalized_asv_table_subset <- subset(bacteria_normalized_asv_table, novercutoff >= 4)
bacteria_normalized_asv_table_subset <- bacteria_normalized_asv_table_subset[!(names(bacteria_normalized_asv_table_subset) %in% c("novercutoff"))]
# Save ASVs to keep as a list:
abundant_ASVs <- rownames(bacteria_normalized_asv_table_subset)
PC_loadings_bacteria_subset <- subset(PC_loadings_bacteria, FeatureID %in% abundant_ASVs)
bacteria_PCA_gg <- ggplot(PC_loadings_bacteria_subset,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Family))+
geom_bar(stat = "identity")+
ylim(-0.31,0.31)+
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.length.x=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text.y = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title.y = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
# scale_fill_manual(values = PC_loadings_colors)+
annotate("text", x=Inf, y = Inf, label = "(a)", vjust=1.5, hjust=1.25,size = 20)
bacteria_PCA_gg
# Plot top 15 positive and negative, all ASVs
PC_loadings_bacteria_30 <- PC_loadings_bacteria_subset[c(1:15, (nrow(PC_loadings_bacteria_subset)-14):nrow(PC_loadings_bacteria_subset)),]
# Change some labels
## Marinimicrobia
PC_loadings_bacteria_30 %>% mutate(.,Phylum = ifelse(Phylum == "Marinimicrobia_(SAR406_clade)","Marinimicrobia",
ifelse(Phylum == "","other", Phylum))) -> PC_loadings_bacteria_30
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Phylum Marinimicrobia_(SAR406_clade)","Phylum Marinimicrobia",taxonomy)) -> PC_loadings_bacteria_30
## Clade la
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Clade_Ia","SAR11 Ia",taxonomy)) -> PC_loadings_bacteria_30
## NS9_marine_group
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Family NS9_marine_group","NS9_marine_group",taxonomy)) -> PC_loadings_bacteria_30
## SAR86
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = ifelse(taxonomy == "Order SAR86_clade","SAR86 clade",taxonomy)) -> PC_loadings_bacteria_30
# Remove underscores from group labels
PC_loadings_bacteria_30 %>% mutate(.,taxonomy = gsub("_"," ",taxonomy)) -> PC_loadings_bacteria_30
bacteria_PCA_gg <- ggplot(PC_loadings_bacteria_30,aes(x = reorder(FeatureID,PC1),y = PC1, fill = Phylum))+
geom_bar(stat = "identity")+
coord_flip()+
geom_text(data = PC_loadings_bacteria_30,aes(x = reorder(FeatureID,PC1),y = ifelse(PC1 > 0, -0.01,0.01), label = taxonomy, hjust = ifelse(PC1 > 0, 1,0),color = ifelse(taxonomy =="Unassigned","gray60","black")),size = 5,fontface = ifelse(!(PC_loadings_bacteria_30$Genus %in% c("")|grepl("group",PC_loadings_bacteria_30$Genus)|grepl("clade",PC_loadings_bacteria_30$Genus,ignore.case = TRUE)),"bold.italic","bold"))+
ylim(-0.21,0.21)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.ticks.length.y=unit(0, "cm"),
panel.border = element_rect(colour = "black", fill=NA, size=1),
legend.position = "none",
axis.text.x = element_text(size = 15),
axis.ticks.length.x = unit(0.25,"cm"),
axis.title.x = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
plot.margin = margin(0, 0, 0, 0.95, "cm"),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank())+
scale_fill_manual(values = group_colors)+
scale_color_manual(values = c("black","gray60"))+
annotate("text", x=Inf, y = Inf, label = "(h)", vjust=1.5, hjust=1.25,size = 12)
bacteria_PCA_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/PCA_figure/FINAL/16S_bacterial_PC1_unmerged_top30_092520.png",bacteria_PCA_gg,width = 7, height = 8.5)
kosmos_12S_sample_sums <- as.data.frame(sample_sums(kosmos_12S_16_phyloseq))
kosmos_12S_sample_sums <- rownames_to_column(kosmos_12S_sample_sums,"sample")
colnames(kosmos_12S_sample_sums)[2] <- "total_reads"
kosmos_12S_sample_sums %>% mutate(.,sample_name = substr(gsub("KOSMOS","",sample),1,nchar(gsub("KOSMOS","",sample))-5)) -> kosmos_12S_sample_sums
# Add a mesocosm vs. Pacific column
kosmos_12S_sample_sums$sample_name <- as.character(kosmos_12S_sample_sums$sample_name)
kosmos_12S_sample_sums %>% mutate(.,station = substr(kosmos_12S_sample_sums$sample_name,nchar(kosmos_12S_sample_sums$sample_name)-1,nchar(kosmos_12S_sample_sums$sample_name))) -> kosmos_12S_sample_sums
kosmos_12S_sample_sums$sample_name <- factor(kosmos_12S_sample_sums$sample_name, levels = c("D1_M1", "D1_MP", "D8_M1", "D8_MP", "D15_M1", "D15_MP", "D24_M1", "D24_MP", "D32_M1", "D32_MP", "D36_M1", "D36_MP", "D42_M1", "D42_MP", "D48_M1", "D48_MP"))
kosmos_12S_sample_sums_gg <- ggplot(kosmos_12S_sample_sums,aes(x = sample_name,y = total_reads,fill = station))+
geom_bar(stat = "identity")+
ggtitle("12S reads per sample")+
theme(axis.text.y = element_text(size = 15),
axis.text.x = element_text(size = 15,angle = 90),
axis.title = element_text(size = 18),
plot.title = element_text(size = 25))+
ylab("Total Reads")+
xlab("Sample")
kosmos_12S_sample_sums_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/12S/12S_sample_sums.png",kosmos_12S_sample_sums_gg,width = 10, height = 6)
kosmos_12S_tax_table <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq),"matrix"))
kosmos_12S_tax_table <- rownames_to_column(kosmos_12S_tax_table,"ASV")
kosmos_12S_tax_table %>% mutate_all(as.character) -> kosmos_12S_tax_table
# Sum of reads by ASV
kosmos_12S_taxa_sums <- as.data.frame(taxa_sums(kosmos_12S_16_phyloseq))
kosmos_12S_taxa_sums <- rownames_to_column(kosmos_12S_taxa_sums,"ASV")
colnames(kosmos_12S_taxa_sums)[2] <- "total_reads"
kosmos_12S_taxa_sums <- left_join(kosmos_12S_taxa_sums, kosmos_12S_tax_table, by = "ASV")
kosmos_12S_taxa_sums <- kosmos_12S_taxa_sums[order(-kosmos_12S_taxa_sums$total_reads),]
# Let's group by taxonomy and sum
dplyr::select(kosmos_12S_taxa_sums,-ASV) %>% group_by(Kingdom,Phylum, Class,Order, Family, Genus, Species) %>% summarise_all(funs(sum)) -> kosmos_12S_taxa_sums_grouped
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
kosmos_12S_taxa_sums_grouped <- kosmos_12S_taxa_sums_grouped[order(-kosmos_12S_taxa_sums_grouped$total_reads),]
write.csv(kosmos_12S_taxa_sums_grouped,"/Users/mmin/Documents/KOSMOS_paper_figures/12S/kosmos_12S_taxa_sums.csv")
kosmos_12S_16_phyloseq_chordata <- subset_taxa(kosmos_12S_16_phyloseq, Phylum == "Chordata")
# Remove Paralichthys reads
kosmos_12S_16_phyloseq_chordata <- subset_taxa(kosmos_12S_16_phyloseq_chordata, Genus != "Paralichthys")
kosmos_12S_16_phyloseq_chordata_merged <- tax_glom(kosmos_12S_16_phyloseq_chordata,taxrank = rank_names(kosmos_12S_16_phyloseq_chordata)[7],NArm = FALSE)
kosmos_12S_chordata_merged_asv_table <- as.data.frame(otu_table(kosmos_12S_16_phyloseq_chordata_merged))
kosmos_12S_chordata_merged_asv_table <- rownames_to_column(kosmos_12S_chordata_merged_asv_table,"ASV")
kosmos_12S_chordata_merged_tax_table <- as.data.frame(as(tax_table(kosmos_12S_16_phyloseq_chordata_merged),"matrix"))
kosmos_12S_chordata_merged_tax_table <- rownames_to_column(kosmos_12S_chordata_merged_tax_table,"ASV")
kosmos_12S_chordata_merged_tax_table %>% mutate_all(as.character) -> kosmos_12S_chordata_merged_tax_table
kosmos_12S_chordata_merged_combined_table <- left_join(kosmos_12S_chordata_merged_asv_table,kosmos_12S_chordata_merged_tax_table,by = "ASV")
# Create a new taxonomy field, with best taxonomy (family or below)
kosmos_12S_chordata_merged_combined_table %>% mutate(.,taxonomy = ifelse(!(Species %in% c("s_","unassigned")),Species,
ifelse(!(Genus %in% c("g_","unassigned")),Genus,
ifelse(!(Family %in% c("unassigned")),Family,
ifelse(!(Order %in% c("unassigned")),Order,"unassigned"))))) -> kosmos_12S_chordata_merged_combined_table
# Sum of reads by taxonomy
kosmos_12S_chordata_merged_taxa_sums <- as.data.frame(taxa_sums(kosmos_12S_16_phyloseq_chordata_merged))
kosmos_12S_chordata_merged_taxa_sums <- rownames_to_column(kosmos_12S_chordata_merged_taxa_sums,"ASV")
colnames(kosmos_12S_chordata_merged_taxa_sums)[2] <- "total_reads"
kosmos_12S_chordata_merged_taxa_sums <- kosmos_12S_chordata_merged_taxa_sums[order(-kosmos_12S_chordata_merged_taxa_sums$total_reads),]
kosmos_12S_chordata_merged_taxa_sums <- left_join(kosmos_12S_chordata_merged_taxa_sums, kosmos_12S_chordata_merged_tax_table, by = "ASV")
# Determine top 19 taxa, so the rest can be "other
top19_kosmos_12S_taxa <- kosmos_12S_chordata_merged_taxa_sums$ASV[1:19]
kosmos_12S_chordata_merged_combined_table %>% mutate(.,taxonomy_updated = ifelse(ASV %in% top19_kosmos_12S_taxa,taxonomy,"other")) -> kosmos_12S_chordata_merged_combined_table
# Convert to long form
gathercols <- colnames(dplyr::select(kosmos_12S_chordata_merged_asv_table,-ASV))
kosmos_12S_chordata_merged_combined_table_long <- gather(kosmos_12S_chordata_merged_combined_table,key = "sample",value = "reads",gathercols,factor_key = TRUE)
# Shorten sample names
kosmos_12S_chordata_merged_combined_table_long$sample <- as.character(kosmos_12S_chordata_merged_combined_table_long$sample)
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,sample_name = substr(gsub("KOSMOS","",sample),1,nchar(gsub("KOSMOS","",sample))-5)) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,station = substr(kosmos_12S_chordata_merged_combined_table_long$sample_name,nchar(kosmos_12S_chordata_merged_combined_table_long$sample_name)-1,nchar(kosmos_12S_chordata_merged_combined_table_long$sample_name))) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long$sample_name <- factor(kosmos_12S_chordata_merged_combined_table_long$sample_name, levels = c("D1_M1", "D1_MP", "D8_M1", "D8_MP", "D15_M1", "D15_MP", "D24_M1", "D24_MP", "D32_M1", "D32_MP", "D36_M1", "D36_MP", "D42_M1", "D42_MP", "D48_M1", "D48_MP"))
# Create a new data frame with total reads for each sample in order to get one border around each bar
dplyr::select(kosmos_12S_chordata_merged_combined_table_long,-c(ASV,Kingdom,Phylum, Class,Order, Family, Genus, Species,taxonomy,taxonomy_updated,sample,station)) %>% group_by(sample_name) %>% summarise_all(funs(sum)) -> kosmos_12S_chordata_sample_sums
colnames(kosmos_12S_chordata_sample_sums)[2] <- "total_reads_per_sample"
# Add these sample sums to original data frame
kosmos_12S_chordata_merged_combined_table_long <- left_join(kosmos_12S_chordata_merged_combined_table_long,kosmos_12S_chordata_sample_sums,by = "sample_name")
# Make a day column
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,day = gsub("_.*","",sample_name)) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long$day <- factor(kosmos_12S_chordata_merged_combined_table_long$day,levels = c("D1","D8","D15","D24","D32","D36","D42","D48"))
# Re-order taxonomy_updated, so that other is last
kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated = factor(kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated, levels = c(sort(unique(kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated)[!unique(kosmos_12S_chordata_merged_combined_table_long$taxonomy_updated) == "other"]),"other"))
# Determine top 10 genera
# dplyr::select(kosmos_M1_12S_taxa_sums,-ASV) %>% group_by(Kingdom,Phylum, Class,Order, Family, Genus, Species) %>% summarise_all(funs(sum)) -> kosmos_M1_12S_taxa_sums_grouped
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,taxonomy_genus = ifelse(!(Genus %in% c("g_","unassigned")),Genus,
ifelse(!(Family %in% c("unassigned")),paste0("Family ",Family),"unassigned"))) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_taxa_sums %>% mutate(.,taxonomy_genus = ifelse(!(Genus %in% c("g_","unassigned")),Genus,
ifelse(!(Family %in% c("unassigned")),paste0("Family ",Family),"unassigned"))) -> kosmos_12S_chordata_merged_taxa_sums
dplyr::select(kosmos_12S_chordata_merged_taxa_sums,-c(ASV,Kingdom,Phylum, Class,Order, Genus, Family, Species)) %>% group_by(taxonomy_genus) %>% summarise_all(funs(sum)) -> kosmos_12S_chordata_genus_sums
kosmos_12S_chordata_genus_sums <- kosmos_12S_chordata_genus_sums[order(-kosmos_12S_chordata_genus_sums$total_reads),]
top12_genera <- kosmos_12S_chordata_genus_sums$taxonomy_genus[1:11]
kosmos_12S_chordata_merged_combined_table_long %>% mutate(.,genus_updated = ifelse(taxonomy_genus %in% top12_genera,taxonomy_genus,"other")) -> kosmos_12S_chordata_merged_combined_table_long
kosmos_12S_chordata_merged_combined_table_long$genus_updated <- factor(kosmos_12S_chordata_merged_combined_table_long$genus_updated, levels = c("Engraulis", "Family Sciaenidae", "Family Engraulidae", "Odontesthes", "Family Haemulidae", "Ethmidium", "Bairdiella", "Citharichthys", "Sardinops", "Scomber", "Mugil","other", "unassigned"))
inca_tern <- readPNG("/Users/mmin/Documents/KOSMOS_paper_figures/phylopic/inca_tern.png")
inca_tern_img <- rasterGrob(inca_tern, interpolate=TRUE)
pelican <- readPNG("/Users/mmin/Documents/KOSMOS_paper_figures/phylopic/pelican.png")
pelican_img <- rasterGrob(pelican, interpolate=TRUE)
kosmos_12S_barplot <- ggplot(subset(kosmos_12S_chordata_merged_combined_table_long,station == "M1"), aes(x = day, y = reads,fill = genus_updated))+
geom_bar(stat = "identity")+
scale_fill_tableau(palette = "Tableau 20")+
ylab("Total Reads")+
xlab("Mesocosm Sample")+
guides(color=FALSE,fill=guide_legend(title="Genus"))+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank(),
legend.position = "none")+
scale_y_continuous(expand = c(0, 0),lim = c(0,22000))+
annotate("text", x=-Inf, y = Inf, label = "(b)", vjust=1.5, hjust=-0.25,size = 12)+
# Add bird pictures from phylopic
annotation_custom(pelican_img, xmin=5.5, xmax=6.5, ymin= 18030, ymax=21030) +
annotation_custom(inca_tern_img, xmin=6.5, xmax=7.5, ymin= 6228, ymax=9228) +
annotation_custom(inca_tern_img, xmin=7.5, xmax=8.5, ymin= 16916, ymax=19916)
kosmos_12S_barplot
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/12S/12S_mesocosm_barplot_no_paralichthys_genus_plus_birds_090220.png",kosmos_12S_barplot,width = 8, height = 3.5)
kosmos_12S_barplot <- ggplot(subset(kosmos_12S_chordata_merged_combined_table_long,station == "MP"), aes(x = day, y = reads,fill = genus_updated))+
geom_bar(stat = "identity")+
scale_fill_tableau(palette = "Tableau 20")+
ylab("Total Reads")+
xlab("Pacific Sample")+
guides(color=FALSE,fill=guide_legend(title="Genus"))+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank(),
legend.position = "none")+
scale_y_continuous(expand = c(0, 0),lim = c(0,22000))+
annotate("text", x=-Inf, y = Inf, label = "(a)", vjust=1.5, hjust=-0.25,size = 12)
kosmos_12S_barplot
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/12S/12S_pacific_barplot_noparalichthys_genus.png",kosmos_12S_barplot,width = 8, height = 3.5)
fakedata <- rep(0.05,12)
fakesample <- rep("nosample",12)
legend_data <- data.frame(allgroups,fakedata,fakesample)
legend_data$allgroups <- factor(unique(kosmos_12S_chordata_merged_combined_table_long$genus_updated), levels = c("Engraulis", "Family Sciaenidae", "Family Engraulidae", "Odontesthes", "Family Haemulidae", "Ethmidium", "Bairdiella", "Citharichthys", "Sardinops", "Scomber", "Mugil","other", "unassigned"))
legend_gg<-ggplot(legend_data, aes(x=fakesample,y=fakedata,fill=allgroups))+
geom_bar(stat='identity')+theme_minimal()+
scale_fill_tableau(palette = "Tableau 20")+
theme(axis.text.x = element_blank(),
legend.position = "right",
axis.title=element_text(size=8),
plot.margin = unit(c(0,5,0,0), "pt"),
legend.text=element_text(size=10),
legend.title = element_blank()
)+
guides(fill=guide_legend(ncol=4))+
xlab("Sample")+
ylab("Proportion of Reads")
legend_gg
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/12S/12S_genus_barplot_legend.png",legend_gg,width = 8, height = 2)
kosmos_12S_M1_genus_phyloseq <- tax_glom(kosmos_12S_M1_phyloseq,taxrank=rank_names(kosmos_12S_M1_phyloseq)[6], NArm = FALSE)
kosmos_12S_M1_genus_asv_table <- as.data.frame(as(otu_table(kosmos_12S_M1_genus_phyloseq),"matrix"))
kosmos_12S_M1_genus_asv_table <- rownames_to_column(kosmos_12S_M1_genus_asv_table,"ASV")
kosmos_12S_M1_genus_tax_table <- as.data.frame(as(tax_table(kosmos_12S_M1_genus_phyloseq),"matrix"))
kosmos_12S_M1_genus_tax_table <- rownames_to_column(kosmos_12S_M1_genus_tax_table,"ASV")
kosmos_12S_M1_genus_combined_table <- left_join(kosmos_12S_M1_genus_asv_table, kosmos_12S_M1_genus_tax_table, by = "ASV")
# Convert to long form
gathercols <- colnames(dplyr::select(kosmos_12S_M1_genus_asv_table,-ASV))
kosmos_12S_M1_genus_combined_table_gather <- gather(kosmos_12S_M1_genus_combined_table, key = "sample", value = "reads", gathercols, factor_key=TRUE)
kosmos_12S_M1_genus_combined_table_gather$sample <- as.character(kosmos_12S_M1_genus_combined_table_gather$sample)
kosmos_12S_M1_genus_combined_table_gather %>% mutate(.,day = gsub("_.*","", gsub("KOSMOSD","",sample))) -> kosmos_12S_M1_genus_combined_table_gather
paralichthys_M1_long <- subset(kosmos_12S_M1_genus_combined_table_gather, Genus == "Paralichthys")
paralichthys_M1_long$sample <- factor(paralichthys_M1_long$sample, levels = c("KOSMOSD1_M1BS", "KOSMOSD8_M1AS", "KOSMOSD15_M1AS", "KOSMOSD24_M1AS", "KOSMOSD32_M1AS", "KOSMOSD36_M1AS", "KOSMOSD42_M1AS", "KOSMOSD48_M1AS"))
# paralichthys_M1_long$day <- factor(paralichthys_M1_long$day,levels = c("1","8","15","24","32","36","42","48"))
paralichthys_M1_long$day <- as.numeric(as.character(paralichthys_M1_long$day))
paralichthys_M1_long %>% mutate(.,sample_name = paste0("D",day)) -> paralichthys_M1_long
paralichthys_M1_long$sample_name <- factor(paralichthys_M1_long$sample_name,levels = c("D1","D8","D15","D24","D32","D36","D42","D48"))
paralichthys_M1_gg <- ggplot(paralichthys_M1_long, aes(x = sample_name, y = reads,fill = Genus))+
geom_bar(stat = 'identity',width=0.35)+
xlab("Mesocosm Sample")+
ylab("Total Reads")+
geom_vline(xintercept=4.75,color="#e41a1c",lty=2,size = 1.2)+
scale_fill_manual(values = c("black"))+
scale_y_continuous(expand = c(0, 0),lim = c(0,8500))+
# annotate(geom = "text",x = 4.65, y = 4250,angle = 90, color = "#e41a1c", size = 2, label = expression(paste(italic("Paralichthys adspersus")," eggs added on day 31")))+
annotate(geom = "text",x = 4.38, y = 4250,angle = 90, color = "#e41a1c", size = 6, label = expression(paste(italic("Paralichthys adspersus"))))+
annotate(geom = "text",x = 4.57, y = 4250,angle = 90, color = "#e41a1c", size = 6, label = "eggs added on day 31")+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
# legend.position = "none",
axis.text = element_text(size = 15),
axis.ticks.length.y = unit(0.25,"cm"),
axis.title = element_text(size = 20),
panel.background = element_rect(fill = "white",color = "white",size = 1),
panel.grid.major.x = element_blank(),
# panel.grid.major.y = element_line(size = 1, color = "gray70"),
panel.grid.minor = element_blank(),
legend.position = "none")+
annotate("text", x=-Inf, y = Inf, label = "(c)", vjust=1.5, hjust=-0.25,size = 12)+
annotate("rect",xmin = 1, xmax = 1.35, ymin = 5500, ymax = 6250,fill = "black",color = "black")+
annotate("text", x = 1.45, y = 5875, label = "Paralichthys",hjust = 0,size = 6)
paralichthys_M1_gg
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
ggsave("/Users/mmin/Documents/KOSMOS_paper_figures/12S/paralichthys_M1.png",paralichthys_M1_gg,width = 8, height = 4)
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'